From c4ba161dbcc3a1dc1fdee697a378aaf3c5417cb0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 6 Mar 2025 10:05:16 +0100 Subject: [PATCH 0001/1764] rANS: Suppress bogus compiler warnings, and add / improve some comments --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 1 + Utilities/rANS/benchmarks/bench_ransDecode.cxx | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 3c118f402dc4f..e85a3c3e9e1f3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -416,7 +416,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mInternals = master->mInternals; GPUFailedMsg(cudaSetDevice(mDeviceId)); - GPUInfo("CUDA Initialized from master"); + GPUInfo("CUDA Initialisation successfull (from master)"); } for (uint32_t i = 0; i < mEvents.size(); i++) { diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h index 7117dd0c718c6..c7a1b4f55e501 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h @@ -21,7 +21,7 @@ #include #include #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wshadow" // FIXME: Is this still needed? #include #include #include diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 2d73279cf1fe7..64d8549312736 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -114,6 +114,7 @@ class GPUTPCGMMergedTrack float mLastY; //* outer Y float mLastZ; //* outer Z uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays + // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters uint32_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; diff --git a/Utilities/rANS/benchmarks/bench_ransDecode.cxx b/Utilities/rANS/benchmarks/bench_ransDecode.cxx index a642b58a3431d..0a11b042a77ee 100644 --- a/Utilities/rANS/benchmarks/bench_ransDecode.cxx +++ b/Utilities/rANS/benchmarks/bench_ransDecode.cxx @@ -112,8 +112,11 @@ void ransDecodeBenchmark(benchmark::State& st, Args&&... args) using input_data_type = std::remove_cv_t>; using source_type = typename input_data_type::value_type; +#pragma GCC diagnostic push // TODO: Remove me when fixed in GCC +#pragma GCC diagnostic ignored "-Walloc-size-larger-than=" EncodeBuffer encodeBuffer{inputData.size()}; DecodeBuffer decodeBuffer{inputData.size()}; +#pragma GCC diagnostic pop const auto histogram = makeDenseHistogram::fromSamples(gsl::span(inputData)); Metrics metrics{histogram}; From 0bd7fb7f7f882973aaa431a593b285082550971e Mon Sep 17 00:00:00 2001 From: Maximilian Korwieser Date: Fri, 31 Jan 2025 13:04:55 +0100 Subject: [PATCH 0002/1764] [TPC-QC]Add functionality to set the coordinates of PV. --- Detectors/TPC/qc/include/TPCQC/Tracks.h | 23 +++++++++++++++-------- Detectors/TPC/qc/src/Tracks.cxx | 3 +-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Detectors/TPC/qc/include/TPCQC/Tracks.h b/Detectors/TPC/qc/include/TPCQC/Tracks.h index f6cff7c2b8090..ff215b68ce3b1 100644 --- a/Detectors/TPC/qc/include/TPCQC/Tracks.h +++ b/Detectors/TPC/qc/include/TPCQC/Tracks.h @@ -81,19 +81,26 @@ class Tracks mUseCutMaxAbsDCArOnHistos = useCutMaxAbsDCArOnHistos; } + // Set PV position + void setPVposition(const o2::math_utils::Point3D meanVtxPoint3D) + { + mPositionOfPV = meanVtxPoint3D; + } + /// get ratios of 1D histograms std::unordered_map>& getMapHist() { return mMapHist; } const std::unordered_map>& getMapHist() const { return mMapHist; } private: - float mCutAbsEta = 1.f; // Eta cut - int mCutMinnCls = 60; // minimum N clusters - float mCutMindEdxTot = 20.f; // dEdxTot min value - float mCutMinPtDCAr = 1.5f; // minimum pT for DCAr plots DCAr vs. phi, eta, nCluster - float mSamplingFractionDCAr = 0.1f; // sampling rate for calculation of DCAr - bool mTurnOffHistosForAsync = false; // Decide whether to turn off some histograms for async to reduce memory - float mCutMaxAbsDCAr = 1.f; // maximum DCAr - bool mUseCutMaxAbsDCArOnHistos = false; // Decide whether to use the cut on maximum DCAr for the histograms + float mCutAbsEta = 1.f; // Eta cut + int mCutMinnCls = 60; // minimum N clusters + float mCutMindEdxTot = 20.f; // dEdxTot min value + float mCutMinPtDCAr = 1.5f; // minimum pT for DCAr plots DCAr vs. phi, eta, nCluster + float mSamplingFractionDCAr = 0.1f; // sampling rate for calculation of DCAr + bool mTurnOffHistosForAsync = false; // Decide whether to turn off some histograms for async to reduce memory + float mCutMaxAbsDCAr = 1.f; // maximum DCAr + bool mUseCutMaxAbsDCArOnHistos = false; // Decide whether to use the cut on maximum DCAr for the histograms + o2::math_utils::Point3D mPositionOfPV{}; // Position of the PV std::unordered_map> mMapHist; std::vector mHist1D{}; ///< Initialize vector of 1D histograms diff --git a/Detectors/TPC/qc/src/Tracks.cxx b/Detectors/TPC/qc/src/Tracks.cxx index 9f1d9aabf9523..8e6f0d702df1b 100644 --- a/Detectors/TPC/qc/src/Tracks.cxx +++ b/Detectors/TPC/qc/src/Tracks.cxx @@ -180,9 +180,8 @@ bool Tracks::processTrack(const o2::tpc::TrackTPC& track) if (propagator->getMatLUT() && propagator->hasMagFieldSet()) { // ---| fill DCA histos |--- o2::gpu::gpustd::array dca; - const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); - if (propagator->propagateToDCABxByBz(refPoint, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { + if (propagator->propagateToDCABxByBz(mPositionOfPV, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { const auto phi = o2::math_utils::to02PiGen(track.getPhi()); dcaHistPT->Fill(pt, dca[0]); dcaHist->Fill(phi, dca[0]); From b6432870f6504a8005d8b70d24e596341679859c Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 4 Mar 2025 18:57:12 +0100 Subject: [PATCH 0003/1764] TPC: Cluster filter for PbPb 2023 --- GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx | 46 ++++++++++++++++++- GPU/GPUTracking/Debug/GPUTPCClusterFilter.h | 26 ++++++++++- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 10 ++-- .../Global/GPUChainTrackingClusterizer.cxx | 4 +- .../Global/GPUChainTrackingCompression.cxx | 6 ++- .../GPUChainTrackingDebugAndProfiling.cxx | 5 +- 7 files changed, 83 insertions(+), 16 deletions(-) diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx index cdd0e4879f949..92adcbd9c14d7 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx @@ -12,14 +12,47 @@ /// \file GPUTPCClusterFilter.cxx /// \author David Rohr +#include "GPUCommonLogger.h" #include "GPUTPCClusterFilter.h" #include "DataFormatsTPC/ClusterNative.h" using namespace o2::gpu; -GPUTPCClusterFilter::GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters) +GPUTPCClusterFilter::GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters, uint8_t filterType) + : mFilterType(filterType) { - // Could initialize private variables based on the clusters here + if (filterType == 1) { + // Custom filter settings go here + + } else if (filterType == 2) { + // PbPb23 filter + mClusterStats = std::make_unique[]>(MaxStacks); + static bool called = false; + if (!called) { + LOGP(info, "GPUTPCClusterFilter called for PbPb 2023 settings"); + called = true; + } + + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { + const uint32_t globalStack = getGlobalStack(iSector, iRow); + mClusterStats[globalStack].resize(MaxTimeBin); + + for (uint32_t k = 0; k < clusters.nClusters[iSector][iRow]; k++) { + const o2::tpc::ClusterNative& cl = clusters.clusters[iSector][iRow][k]; + const int clTime = static_cast(cl.getTime()); + const float clQmax = cl.getQmax(); + + if (clQmax < 12) { + if (clTime >= static_cast(mClusterStats[globalStack].size())) { + mClusterStats[globalStack].resize(mClusterStats[globalStack].size() + 445); + } + ++mClusterStats[globalStack][clTime]; + } + } + } + } + } } bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::ClusterNative& cl) @@ -27,5 +60,14 @@ bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::Cluster // Return true to keep the cluster, false to drop it. // May change cluster properties by modifying the cl reference. // Note that this function might be called multiple times for the same cluster, in which case the final modified cl reference goes into the output clusters. + if (mFilterType == 2) { + const uint32_t globalStack = getGlobalStack(sector, row); + const int clTime = static_cast(cl.getTime()); + const float clQmax = cl.getQmax(); + if ((mClusterStats[globalStack][clTime] > 40 && clQmax < 12) || (mClusterStats[globalStack][clTime] > 200)) { + return false; + } + } + return true; } diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h index 908f78fd23b9a..b39237757de53 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h @@ -15,7 +15,10 @@ #ifndef GPUTPCCLUSTERFILTER_H #define GPUTPCCLUSTERFILTER_H +#include #include +#include +#include "GPUDefConstantsAndSettings.h" namespace o2::tpc { @@ -28,8 +31,29 @@ namespace o2::gpu class GPUTPCClusterFilter { public: - GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters); + GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters, uint8_t filterType); bool filter(uint32_t sector, uint32_t row, o2::tpc::ClusterNative& cl); + + private: + static constexpr uint32_t MaxTimeBin = 14256; + static constexpr uint32_t MaxStacks = GPUCA_NSECTORS * 4; + uint8_t mFilterType = 0; //< 0: off, 1: custom, 2: PbPb23 + + std::unique_ptr[]> mClusterStats; //< Number of clusters per stack and time bin + + uint32_t getGlobalStack(uint32_t sector, uint32_t row) const + { + int stack = 3; + if (row < 63) { + stack = 0; + } else if (row < 97) { + stack = 1; + } else if (row < 127) { + stack = 2; + } + + return sector * 4 + stack; + }; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 25419f3483dd6..3c31a4fbb8409 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -291,7 +291,7 @@ AddOption(tpcDownscaledEdx, uint8_t, 0, "", 0, "If != 0, downscale dEdx processi AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow") AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding") AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters") -AddOption(tpcApplyDebugClusterFilter, bool, false, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class") +AddOption(tpcApplyClusterFilterOnCPU, uint8_t, 0, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class, 0: off, 1: debug, 2: PbPb23") AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 9de8b2174a182..a63886b93ccf9 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -294,16 +294,12 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression) && GetProcessingSettings().tpcApplyCFCutsAtDecoding && !GetProcessingSettings().tpcUseOldCPUDecoding) { - GPUError("tpcApplyCFCutsAtDecoding currently requires tpcUseOldCPUDecoding"); - return false; - } if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } - if (GetProcessingSettings().tpcApplyDebugClusterFilter == 1 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { - GPUError("tpcApplyDebugClusterFilter cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { + GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); return false; } if (GetRecoSteps() & RecoStep::TRDTracking) { @@ -815,7 +811,7 @@ int32_t GPUChainTracking::RunChainFinalize() PrintDebugOutput(); - //PrintMemoryRelations(); + // PrintMemoryRelations(); if (GetProcessingSettings().eventDisplay) { if (!mDisplayRunning) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 121d60873324f..62a4a524d67df 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -635,7 +635,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (mWaitForFinalInputs) { GPUFatal("Cannot use waitForFinalInput callback without delayed output"); } - if (!GetProcessingSettings().tpcApplyDebugClusterFilter) { + if (!GetProcessingSettings().tpcApplyClusterFilterOnCPU) { AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); tmpNativeClusters = mInputsHost->mPclusterNativeOutput; } else { @@ -1021,7 +1021,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) tmpNativeAccess->clustersMCTruth = mcLabelsConstView; tmpNativeAccess->setOffsetPtrs(); mIOPtrs.clustersNative = tmpNativeAccess; - if (GetProcessingSettings().tpcApplyDebugClusterFilter) { + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU) { auto allocator = [this, &tmpNativeClusters](size_t size) { this->mInputsHost->mNClusterNative = size; this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 94d39249d620c..1b08de21abd0f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,6 +201,11 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { + const bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); + if (runFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); + } + if (GetProcessingSettings().tpcUseOldCPUDecoding) { const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; @@ -214,7 +219,6 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding; auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index ec6b48a55d50d..38e1cd0036c16 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -295,7 +295,8 @@ void GPUChainTracking::SanityCheck() void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts) { - GPUTPCClusterFilter clusterFilter(*clusters); + const uint8_t filterType = GetProcessingSettings().tpcApplyClusterFilterOnCPU; + GPUTPCClusterFilter clusterFilter(*clusters, filterType); o2::tpc::ClusterNative* outputBuffer = nullptr; for (int32_t iPhase = 0; iPhase < 2; iPhase++) { uint32_t countTotal = 0; @@ -312,7 +313,7 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster if (param().tpcCutTimeBin > 0) { keep = keep && cl.getTime() < param().tpcCutTimeBin; } - keep = keep && (!GetProcessingSettings().tpcApplyDebugClusterFilter || clusterFilter.filter(iSector, iRow, cl)); + keep = keep && (!filterType || clusterFilter.filter(iSector, iRow, cl)); if (iPhase && keep) { outputBuffer[countTotal] = cl; } From 23958247bf8d01cdf23e053c7e4886c87d834041 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Thu, 6 Mar 2025 17:40:08 +0100 Subject: [PATCH 0004/1764] TPC: fix crash in MIPTrackFilterDevice when skipping first TF --- Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx index b8b95090d8534..1329dea236b1f 100644 --- a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx +++ b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx @@ -106,6 +106,7 @@ void MIPTrackFilterDevice::init(framework::InitContext& ic) void MIPTrackFilterDevice::run(ProcessingContext& pc) { + o2::base::GRPGeomHelper::instance().checkUpdates(pc); const auto currentTF = processing_helpers::getCurrentTF(pc); if ((mTFCounter++ % mProcessEveryNthTF) && (currentTF >= mProcessNFirstTFs)) { LOGP(info, "Skipping TF {}", currentTF); @@ -115,7 +116,6 @@ void MIPTrackFilterDevice::run(ProcessingContext& pc) } return; } - o2::base::GRPGeomHelper::instance().checkUpdates(pc); const auto tracks = pc.inputs().get>("tracks"); const auto nTracks = tracks.size(); From 21d7d164517eb16042b7b86253c933e5cec191ca Mon Sep 17 00:00:00 2001 From: pillot Date: Fri, 7 Mar 2025 12:54:39 +0100 Subject: [PATCH 0005/1764] new executable to scan HV/LV CCDB objects and look for issues (#14022) --- Detectors/MUON/MCH/Conditions/CMakeLists.txt | 12 + Detectors/MUON/MCH/Conditions/README.md | 53 +- .../MCH/Conditions/src/scan-hvlv-ccdb.cxx | 1121 +++++++++++++++++ 3 files changed, 1184 insertions(+), 2 deletions(-) create mode 100644 Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx diff --git a/Detectors/MUON/MCH/Conditions/CMakeLists.txt b/Detectors/MUON/MCH/Conditions/CMakeLists.txt index f0a84dec6a416..eff44df743df5 100644 --- a/Detectors/MUON/MCH/Conditions/CMakeLists.txt +++ b/Detectors/MUON/MCH/Conditions/CMakeLists.txt @@ -37,6 +37,18 @@ o2_add_executable( O2::MCHGlobalMapping ) +o2_add_executable( + scan-hvlv-ccdb + COMPONENT_NAME mch + SOURCES src/scan-hvlv-ccdb.cxx + PUBLIC_LINK_LIBRARIES + O2::CCDB + O2::CommonUtils + O2::DetectorsDCS + O2::MCHConditions + O2::MCHStatus + ) + if(BUILD_TESTING) o2_add_test( dcs-aliases diff --git a/Detectors/MUON/MCH/Conditions/README.md b/Detectors/MUON/MCH/Conditions/README.md index fda90ba8c9ca9..d35fdcd0a0958 100644 --- a/Detectors/MUON/MCH/Conditions/README.md +++ b/Detectors/MUON/MCH/Conditions/README.md @@ -17,7 +17,9 @@ Those objects are stored at the following CCDB paths : - MCH/Calib/BadChannel - MCH/Calib/RejectList -The BadChannel and RejectList objects can be uploaded, e.g. for debug purposes, using the `o2-mch-bad-channels-ccdb` program : +# o2-mch-bad-channels-ccdb + +The BadChannel and RejectList objects can be uploaded, e.g. for debug purposes, using the `o2-mch-bad-channels-ccdb` program: ```shell $ o2-mch-bad-channels-ccdb --help @@ -46,10 +48,57 @@ Usage: -a [ --alias ] arg DCS alias (HV or LV) to reject ``` -For instance, to create in a local CCDB a RejectList object which declares solar number 32 as bad, from Tuesday 1 November 2022 00:00:01 UTC to Saturday 31 December 2022 23:59:59, use : +For instance, to create in a local CCDB a RejectList object which declares solar number 32 as bad, from Tuesday 1 November 2022 00:00:01 UTC to Saturday 31 December 2022 23:59:59, use: ```shell $ o2-mch-bad-channels-ccdb -p -s 32 -t RejectList --starttimestamp 1667260801000 --endtimestamp 1672531199000 ``` The program will search the reference CCDB (defined with `--referenceccdb`) for existing objects valid during this period and propose you to either overwrite them or update them. In the first case, a single object will be created, valid for the whole period, containing only the new bad channels. In the second case, as many objects as necessary will be created with appropriate validity ranges, adding the new bad channels to the existing ones. + +# o2-mch-scan-hvlv-ccdb + +the HV or LV DCS datapoints stored in the CCDB (http://alice-ccdb.cern.ch) can be scanned using the `o2-mch-scan-hvlv-ccdb` program: + +```shell +$ o2-mch-scan-hvlv-ccdb -h +This program scans HV or LV channels looking for issues +Usage: + -h [ --help ] produce help message + -r [ --runs ] arg run(s) to scan (comma separated list of runs + or ASCII file with one run per line) + -c [ --channels ] arg channel(s) to scan ("HV" or "LV" or comma + separated list of (part of) DCS aliases) + --configKeyValues arg Semicolon separated key=value strings to + change HV thresholds + -d [ --duration ] arg (=0) minimum duration (ms) of HV/LV issues to + consider + -w [ --warning ] arg (=1) warning level (0, 1 or 2) + -p [ --print ] arg (=1) print level (0, 1, 2 or 3) + -o [ --output ] arg (=scan.root) output root file name +``` + +It takes as input a list of runs and a list of either HV or LV channels to scan. **Note that it will scan the CCDB from the begining of the first run to the end of the last one, which can represent quite of lot of files.** More details about the options are given below. + +It produces as output a list of detected issues, with time, duration and affected runs, and a root file with the displays of the data points per channel per chamber for a visual inspection. Issues are triggered when HV/LV values go below a given threshold. For HV channels it also compares the issues found by the internal algorithm with the ones found by [Detectors/MUON/MCH/Status/src/HVStatusCreator.cxx](../Status/src/HVStatusCreator.cxx). + +For instance, to scan all HV channels for runs 545222 and 545223 and detect issues of a minimum duration of 10s, use: +```shell +o2-mch-scan-hvlv-ccdb -r 545222,545223 -c HV -d 10000 +``` + +### channel input formats: +* "HV" to scan all HV channels +* "LV" to scan all LV channels +* comma separated list of (part of) DCS aliases, which must be all of the same type, i.e contain either Quad/Slat (type = HV), or Group/an/di/Sol (type = LV) + +### warning levels: +* 0: no warning +* 1: check data points timestamp w.r.t. HV/LV file validity range with ±5s tolerance +* 2: check data points timestamp w.r.t. HV/LV file validity range without tolerance + +### print levels: +* 0: print detected issues +* 1: same as 0 + print validity range of runs and HV/LV files +* 2: same as 1 + print the first and last data points of each selected channel +* 3: same as 1 + print all the data points of each selected channel diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx new file mode 100644 index 0000000000000..32cd365916c63 --- /dev/null +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -0,0 +1,1121 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "TFile.h" +#include "TCanvas.h" +#include "TGraph.h" +#include "TH1F.h" +#include "TLine.h" +#include "TMultiGraph.h" +#include "TStyle.h" + +#include "CCDB/BasicCCDBManager.h" +#include "CCDB/CcdbApi.h" +#include "CommonUtils/ConfigurableParam.h" +#include "DetectorsDCS/DataPointIdentifier.h" +#include "DetectorsDCS/DataPointValue.h" +#include "MCHConditions/DCSAliases.h" +#include "MCHStatus/HVStatusCreator.h" +#include "MCHStatus/StatusMapCreatorParam.h" + +namespace po = boost::program_options; + +using namespace o2; +using DPID = dcs::DataPointIdentifier; +using DPVAL = dcs::DataPointValue; +using DPMAP = std::unordered_map>; +using DPMAP2 = std::map>; +using RBMAP = std::map>; +using DPBMAP = std::map; +using ISSUE = std::tuple; +using ISSUELIST = std::vector; +using ISSUEMAP = std::map; + +//---------------------------------------------------------------------------- +bool containsAKey(std::string data, const std::set& Keys) +{ + /// check if the data contains one of the keys + + auto itKey = std::find_if(Keys.begin(), Keys.end(), [&data](const auto& key) { + return data.find(key) != data.npos; + }); + + return itKey != Keys.end(); +} + +//---------------------------------------------------------------------------- +bool isValid(std::string alias) +{ + /// check if the alias is a valid (part of a) DCS alias + + static const std::vector aliases = + mch::dcs::aliases({mch::dcs::MeasurementType::HV_V, + mch::dcs::MeasurementType::LV_V_FEE_ANALOG, + mch::dcs::MeasurementType::LV_V_FEE_DIGITAL, + mch::dcs::MeasurementType::LV_V_SOLAR}); + + auto itAlias = std::find_if(aliases.begin(), aliases.end(), [&alias](const auto& a) { + return a.find(alias) != a.npos; + }); + + return itAlias != aliases.end(); +} + +//---------------------------------------------------------------------------- +void scanWhat(std::string what, std::string& path, bool& scanHV, bool& scanAll, std::set& aliases) +{ + /// get what to scan and where + + static const std::set hvKeys{"HV", "Quad", "Slat"}; + static const std::set lvKeys{"LV", "Group", "an", "di", "Sol"}; + + // HV or LV ? + path = ""; + scanHV = false; + if (containsAKey(what, hvKeys)) { + path = "MCH/Calib/HV"; + scanHV = true; + } + if (containsAKey(what, lvKeys)) { + if (scanHV) { + printf("error: cannot scan HV and LV channels at the same time\n"); + exit(1); + } + path = "MCH/Calib/LV"; + } + if (path.empty()) { + printf("error: no valid HV or LV channel to scan\n"); + exit(1); + } + + // everything or specific aliases ? + if (what.find(scanHV ? "HV" : "LV") != what.npos) { + scanAll = true; + aliases.clear(); + } else { + scanAll = false; + std::istringstream input(what); + for (std::string alias; std::getline(input, alias, ',');) { + if (isValid(alias)) { + aliases.insert(alias); + } else { + printf("error: \"%s\" invalid (part of) HV or LV alias\n", alias.c_str()); + exit(1); + } + } + } +} + +//---------------------------------------------------------------------------- +uint64_t ms2s(uint64_t ts) +{ + /// convert the time stamp from ms to s + + return (ts + 500) / 1000; +} + +//---------------------------------------------------------------------------- +std::string getTime(uint64_t ts) +{ + /// convert the time stamp (ms) to local time + + time_t t = ms2s(ts); + + std::string time = std::ctime(&t); + time.pop_back(); // remove trailing \n + + return time; +} + +//---------------------------------------------------------------------------- +std::set getRuns(std::string runList) +{ + /// read the runList from an ASCII file, or a comma separated run list, or a single run + + std::set runs{}; + + auto isNumber = [](std::string val) { return !val.empty() && val.find_first_not_of("0123456789") == val.npos; }; + + if (isNumber(runList)) { + + runs.insert(std::stoi(runList)); + + } else if (runList.find(",") != runList.npos) { + + std::istringstream input(runList); + for (std::string run; std::getline(input, run, ',');) { + if (isNumber(run)) { + runs.insert(std::stoi(run)); + } + } + + } else { + + std::ifstream input(runList); + if (input.is_open()) { + for (std::string run; std::getline(input, run);) { + if (isNumber(run)) { + runs.insert(std::stoi(run)); + } + } + } + } + + return runs; +} + +//---------------------------------------------------------------------------- +RBMAP getRunBoundaries(ccdb::CcdbApi const& api, std::string runList) +{ + /// return the SOR / EOR time stamps for every runs in the list + + RBMAP runBoundaries{}; + + auto runs = getRuns(runList); + + for (auto run : runs) { + auto boundaries = ccdb::CCDBManagerInstance::getRunDuration(api, run); + runBoundaries.emplace(run, boundaries); + } + + return runBoundaries; +} + +//---------------------------------------------------------------------------- +void checkRunBoundaries(const RBMAP& runBoundaries) +{ + /// check the consistency of the run time boundaries + + if (runBoundaries.empty()) { + printf("error: no run found from the list\n"); + exit(1); + } + + bool error = false; + int previousRun = 0; + uint64_t endOfPreviousRun = 0; + + for (const auto& [run, boundaries] : runBoundaries) { + if (boundaries.second <= boundaries.first) { + printf("error: run %d EOR <= SOR: %llu - %llu (%s - %s)\n", + run, boundaries.first, boundaries.second, + getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); + error = true; + } + if (boundaries.first <= endOfPreviousRun) { + printf("error: SOR run %d <= EOR run %d: %llu (%s) <= %llu (%s)\n", + run, previousRun, boundaries.first, getTime(boundaries.first).c_str(), + endOfPreviousRun, getTime(endOfPreviousRun).c_str()); + error = true; + } + previousRun = run; + endOfPreviousRun = boundaries.second; + } + + if (error) { + exit(1); + } +} + +//---------------------------------------------------------------------------- +void printRunBoundaries(const RBMAP& runBoundaries) +{ + /// print the list of runs with their time boundaries + + printf("\nlist of runs with their boundaries:\n"); + printf("------------------------------------\n"); + + for (const auto& [run, boundaries] : runBoundaries) { + printf("%d: %llu - %llu (%s - %s)\n", run, boundaries.first, boundaries.second, + getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); + } + + printf("------------------------------------\n"); +} + +//---------------------------------------------------------------------------- +void drawRunBoudaries(const RBMAP& runBoundaries, TCanvas* c) +{ + /// draw the run time boundaries + + c->cd(); + + for (const auto& [run, boundaries] : runBoundaries) { + + TLine* startRunLine = new TLine(ms2s(boundaries.first), c->GetUymin(), ms2s(boundaries.first), c->GetUymax()); + startRunLine->SetUniqueID(run); + startRunLine->SetLineColor(4); + startRunLine->SetLineWidth(1); + startRunLine->Draw(); + + TLine* endRunLine = new TLine(ms2s(boundaries.second), c->GetUymin(), ms2s(boundaries.second), c->GetUymax()); + endRunLine->SetUniqueID(run); + endRunLine->SetLineColor(2); + endRunLine->SetLineWidth(1); + endRunLine->Draw(); + } +} + +//---------------------------------------------------------------------------- +DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, uint64_t tStart, uint64_t tStop) +{ + /// get the time boundaries of every HV/LV files found in the time range + + // add extra margin (ms) of ± 1 min to the creation time, which occurs every 30 min + static const uint64_t timeMarging[2] = {60000, 1860000}; + + std::istringstream fileInfo(api.list(what.c_str(), false, "text/plain", + tStop + timeMarging[1], tStart - timeMarging[0])); + + DPBMAP dpBoundaries{}; + std::string dummy{}; + uint64_t begin = 0; + uint64_t end = 0; + + for (std::string line; std::getline(fileInfo, line);) { + if (line.find("Validity:") == 0) { + std::istringstream in(line); + in >> dummy >> begin >> dummy >> end; + dpBoundaries.emplace(begin, end); + } + } + + if (dpBoundaries.empty()) { + printf("\e[0;31merror: no file found in %s in time range %llu - %llu (%s - %s) --> use the default one\e[0m\n", + what.c_str(), tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + dpBoundaries.emplace(1, 9999999999999); + } + + return dpBoundaries; +} + +//---------------------------------------------------------------------------- +void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, uint64_t tStop) +{ + /// check the consistency of HV/LV file time boundaries + + bool error = false; + + if (dpBoundaries.begin()->first > tStart) { + printf("error: the beginning of the time range is not covered: %llu > %llu (%s > %s)\n", + dpBoundaries.begin()->first, tStart, + getTime(dpBoundaries.begin()->first).c_str(), getTime(tStart).c_str()); + error = true; + } + if (dpBoundaries.rbegin()->second < tStop) { + printf("error: the end of the time range is not covered: %llu < %llu (%s < %s)\n", + dpBoundaries.rbegin()->second, tStop, + getTime(dpBoundaries.rbegin()->second).c_str(), getTime(tStop).c_str()); + error = true; + } + + uint64_t previousTStop = dpBoundaries.begin()->first; + for (auto [tStart, tStop] : dpBoundaries) { + if (tStop <= tStart) { + printf("error: EOF <= SOF: %llu - %llu (%s - %s)\n", + tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + error = true; + } + if (tStart != previousTStop) { + printf("error: end of %s file != start of next %s file: %llu (%s) != %llu (%s))\n", + scanHV ? "HV" : "LV", scanHV ? "HV" : "LV", + previousTStop, getTime(previousTStop).c_str(), tStart, getTime(tStart).c_str()); + error = true; + } + previousTStop = tStop; + } + + if (error) { + exit(1); + } +} + +//---------------------------------------------------------------------------- +void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV) +{ + /// print the time boundaries of every HV/LV files found in the full time range + + printf("\nlist of %s file time boundaries:\n", scanHV ? "HV" : "LV"); + printf("------------------------------------\n"); + + for (auto [tStart, tStop] : dpBoundaries) { + printf("%llu - %llu (%s - %s)\n", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + } + + printf("------------------------------------\n"); +} + +//---------------------------------------------------------------------------- +double getLVLimit(std::string alias) +{ + /// return the LV limit for that channel + + static const double lvLimits[3] = {1.5, 1.5, 6.}; // FeeAnalog, FeeDigital, Solar + + if (alias.find("an") != alias.npos) { + return lvLimits[0]; + } else if (alias.find("di") != alias.npos) { + return lvLimits[1]; + } + return lvLimits[2]; +} + +//---------------------------------------------------------------------------- +void drawLimit(double limit, TCanvas* c) +{ + /// draw the HV/LV limit for the displayed chamber + + c->cd(); + + TLine* l = new TLine(c->GetUxmin(), limit, c->GetUxmax(), limit); + l->SetLineColor(1); + l->SetLineWidth(1); + l->SetLineStyle(2); + l->Draw(); +} + +//---------------------------------------------------------------------------- +std::string getDuration(uint64_t tStart, uint64_t tStop) +{ + /// get the duration (dd hh:mm:ss) between the two time stamps (ms) + + auto dt = ms2s(tStop - tStart); + auto s = dt % 60; + auto m = (dt / 60) % 60; + auto h = (dt / 3600) % 24; + auto d = dt / 86400; + + return fmt::format("{:02}d {:02}:{:02}:{:02}", d, h, m, s); +} + +//---------------------------------------------------------------------------- +double getValue(DPVAL dp) +{ + /// return the value of this data point + + union Converter { + uint64_t raw_data; + double value; + } converter; + + converter.raw_data = dp.payload_pt1; + + return converter.value; +} + +//---------------------------------------------------------------------------- +std::string getDE(std::string alias) +{ + /// for DCS HV alias: return the corresponding DE (and sector) + /// for DCS LV alias: return an empty string + + auto de = mch::dcs::aliasToDetElemId(alias); + + if (de) { + return (mch::dcs::isQuadrant(mch::dcs::aliasToChamber(alias))) + ? fmt::format("DE{}-{}", *de, mch::dcs::aliasToNumber(alias) % 10) + : fmt::format("DE{}", *de); + } + + return ""; +} + +//---------------------------------------------------------------------------- +void fillDataPoints(const std::vector& dps, std::map& dps2, + uint64_t tMin, uint64_t tMax, int warningLevel) +{ + /// fill the map of data points + + static const uint64_t tolerance = 5000; + + if (dps.empty()) { + printf("error: the file does not contain any data point\n"); + exit(1); + } + + auto itDP = dps.begin(); + auto ts = itDP->get_epoch_time(); + std::string header = "warning:"; + std::string color = (ts + tolerance < tMin || ts > tMin + tolerance) ? "\e[0;31m" : "\e[0;34m"; + bool printWarning = warningLevel > 1 || (warningLevel == 1 && color == "\e[0;31m"); + + // check if the first data point is a copy of the last one from previous file + if (!dps2.empty()) { + auto previousTS = dps2.rbegin()->first; + if (ts != previousTS || getValue(*itDP) != dps2.rbegin()->second) { + if (ts <= previousTS) { + printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + exit(1); + } + if (printWarning) { + printf("%s%s missing the previous data point (dt = %s%llu ms)", color.c_str(), header.c_str(), + (previousTS < tMin) ? "-" : "+", (previousTS < tMin) ? tMin - previousTS : previousTS - tMin); + if (ts <= tMin) { + printf(" but get one at dt = -%llu ms\e[0m\n", tMin - ts); + } else { + printf("\e[0m\n"); + } + header = " "; + } + } + } + + // add the first data point (should be before the start of validity of the file) + if (ts >= tMax) { + printf("error: first data point exceeding file validity range (dt = +%llu ms)\n", ts - tMax); + exit(1); + } else if (ts > tMin && printWarning) { + printf("%s%s missing data point prior file start of validity (dt = +%llu ms)\e[0m\n", + color.c_str(), header.c_str(), ts - tMin); + header = " "; + } + dps2.emplace(ts, getValue(*itDP)); + + // add other data points (should be within the validity range of the file) + auto previousTS = ts; + for (++itDP; itDP < dps.end(); ++itDP) { + ts = itDP->get_epoch_time(); + if (ts <= previousTS) { + printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + exit(1); + } + if (ts < tMin && (warningLevel > 1 || (warningLevel == 1 && ts + tolerance < tMin))) { + printf("%s%s data point outside of file validity range (dt = -%llu ms)\e[0m\n", + (ts + tolerance < tMin) ? "\e[0;31m" : "\e[0;34m", header.c_str(), tMin - ts); + } else if (ts >= tMax && warningLevel >= 1) { + printf("\e[0;31m%s data point outside of file validity range (dt = +%llu ms)\e[0m\n", + header.c_str(), ts - tMax); + } + dps2.emplace(ts, getValue(*itDP)); + previousTS = ts; + } +} + +//---------------------------------------------------------------------------- +void selectDataPoints(DPMAP2 dpsMapsPerCh[10], uint64_t tStart, uint64_t tStop) +{ + /// remove the data points outside of the given time range and, if needed, + /// add a data point at the boundaries with value equal to the preceding one + + for (int ch = 0; ch < 10; ++ch) { + for (auto& [alias, dps] : dpsMapsPerCh[ch]) { + + // get the first data point in the time range, remove the previous ones + // and add a data point with value equal to the preceding one if it exits + // or to this one otherwise + auto itFirst = dps.lower_bound(tStart); + if (itFirst != dps.begin()) { + double previousVal = std::prev(itFirst)->second; + for (auto it = dps.begin(); it != itFirst;) { + it = dps.erase(it); + } + dps.emplace(tStart, previousVal); + } else if (itFirst->first != tStart) { + if (itFirst->first > tStop) { + printf("error (%s): all data points are posterior to the end of the time range\n", alias.c_str()); + } else { + printf("error (%s): first data point is posterior to the beginning of the time range\n", alias.c_str()); + } + dps.emplace(tStart, itFirst->second); + } + + // get the first data point exceeding the time range, remove it and the next ones + // and add a data point with value equal to the preceding one if needed + auto itLast = dps.upper_bound(tStop); + double previousVal = std::prev(itLast)->second; + for (auto it = itLast; it != dps.end();) { + it = dps.erase(it); + } + dps.emplace(tStop, previousVal); + } + } +} + +//---------------------------------------------------------------------------- +void printDataPoints(const DPMAP2 dpsMapsPerCh[10], std::string hvlvFormat, bool all) +{ + /// print all the registered data points + + const auto format1 = fmt::format(" %llu (%s): {} V\n", hvlvFormat.c_str()); + const auto format2 = fmt::format(": %llu (%s): {} V -- %llu (%s): {} V\n", + hvlvFormat.c_str(), hvlvFormat.c_str()); + + for (int ch = 0; ch < 10; ++ch) { + + printf("\n------------ chamber %d ------------\n", ch + 1); + + for (const auto& [alias, dps] : dpsMapsPerCh[ch]) { + + printf("- %s: %lu values", alias.c_str(), dps.size()); + + if (all) { + + printf("\n"); + for (const auto& [ts, val] : dps) { + printf(format1.c_str(), ts, getTime(ts).c_str(), val); + } + + } else if (!dps.empty()) { + + const auto firstdt = dps.begin(); + const auto lastdt = dps.rbegin(); + printf(format2.c_str(), + firstdt->first, getTime(firstdt->first).c_str(), firstdt->second, + lastdt->first, getTime(lastdt->first).c_str(), lastdt->second); + + } else { + printf("\n"); + } + } + } +} + +//---------------------------------------------------------------------------- +TGraph* mapToGraph(std::string alias, const std::map& dps) +{ + /// create a graph for the DCS channel and add the data points + + TGraph* g = new TGraph(dps.size()); + + auto pos = alias.find("."); + auto shortAlias = alias.substr(0, pos); + auto de = getDE(alias); + auto title = de.empty() ? fmt::format("{}", shortAlias.c_str()) + : fmt::format("{} ({})", de.c_str(), shortAlias.c_str()); + g->SetNameTitle(alias.c_str(), title.c_str()); + + int i(0); + for (auto [ts, val] : dps) { + g->SetPoint(i, ms2s(ts), val); + ++i; + } + + g->SetMarkerSize(1.5); + g->SetMarkerStyle(2); + g->SetLineStyle(2); + + return g; +} + +//---------------------------------------------------------------------------- +TCanvas* drawDataPoints(TMultiGraph* mg, double min, double max) +{ + /// display the data points of the given chamber + + TCanvas* c = new TCanvas(mg->GetName(), mg->GetHistogram()->GetTitle(), 1500, 900); + + mg->Draw("A plc pmc"); + mg->SetMinimum(min); + mg->SetMaximum(max); + mg->GetXaxis()->SetTimeDisplay(1); + mg->GetXaxis()->SetTimeFormat("%d/%m %H:%M"); + mg->GetXaxis()->SetTimeOffset(0, "local"); + mg->GetXaxis()->SetNdivisions(21010); + + c->BuildLegend(); + c->Update(); + + return c; +} + +//---------------------------------------------------------------------------- +void findIssues(const std::map& dps, double limit, ISSUELIST& issues) +{ + /// return the list of HV/LV issues (time range, min value, mean value) for each DCS channel + + uint64_t tStart(0); + double min(0.); + double mean(0.); + uint64_t prevTS(0); + double prevVal(-1.); + + for (auto [ts, val] : dps) { + + if (val < limit) { + + if (tStart == 0) { + + // start a new issue... + tStart = ts; + min = val; + mean = 0.; + prevTS = ts; + prevVal = val; + + } else { + + // ... or complement the current one + min = std::min(min, val); + mean += prevVal * (ts - prevTS); + prevTS = ts; + prevVal = val; + } + + } else if (tStart > 0) { + + // complete the current issue, if any, and register it + mean += prevVal * (ts - prevTS); + mean /= (ts - tStart); + issues.emplace_back(tStart, ts, min, mean, ""); + tStart = 0; + } + } + + // complete the last issue, if any and its duration is != 0, and register it + if (tStart > 0 && prevTS != tStart) { + mean /= (prevTS - tStart); + issues.emplace_back(tStart, prevTS, min, mean, ""); + } +} + +//---------------------------------------------------------------------------- +void fillO2Issues(const std::vector& o2issues, ISSUELIST& issues, + uint64_t tMin, uint64_t tMax) +{ + /// fill the list of issues from O2 (extend the previous one and/or create new ones) + + // the list must not be empty + if (o2issues.empty()) { + printf("error: O2 returns an empty list of issues\n"); + exit(1); + } + + for (auto itIssue = o2issues.begin(); itIssue != o2issues.end(); ++itIssue) { + + // exclude issues fully outside of the DP file boudaries + if (itIssue->end <= tMin || itIssue->begin >= tMax) { + printf("\e[0;35mwarning: skipping O2 issue outside of file boundaries (%llu - %llu)\e[0m\n", + itIssue->begin, itIssue->end); + continue; + } + + // only the first issue could in principle extend before the start of the DP file, to O + if (itIssue->begin < tMin - mch::StatusMapCreatorParam::Instance().timeMargin && + (itIssue != o2issues.begin() || itIssue->begin != 0)) { + printf("\e[0;35mwarning: O2 returns an issue with uncommon start time (%llu < %llu)\e[0m\n", + itIssue->begin, tMin - mch::StatusMapCreatorParam::Instance().timeMargin); + } + + // only the last issue could in principle extend beyond the end of the DP file, to infinity + if (itIssue->end >= tMax + mch::StatusMapCreatorParam::Instance().timeMargin && + (itIssue != std::prev(o2issues.end()) || itIssue->end != std::numeric_limits::max())) { + printf("\e[0;35mwarning: O2 returns an issue with uncommon end time (%llu >= %llu)\e[0m\n", + itIssue->end, tMax + mch::StatusMapCreatorParam::Instance().timeMargin); + } + + // extend the last issue in case of continuity accross the DP files or add a new one, + // restricting their time range within the DP file boundaries + if (itIssue->begin <= tMin && !issues.empty() && std::get<1>(issues.back()) == tMin) { + std::get<1>(issues.back()) = std::min(itIssue->end, tMax); + } else { + issues.emplace_back(std::max(itIssue->begin, tMin), std::min(itIssue->end, tMax), 0., 0., ""); + } + } +} + +//---------------------------------------------------------------------------- +std::string findAffectedRuns(const RBMAP& runBoundaries, uint64_t tStart, uint64_t tStop) +{ + /// return the list of affected runs in this time range + + std::string runs; + + for (const auto& [run, boundaries] : runBoundaries) { + + if (boundaries.second <= tStart) { + continue; + } else if (boundaries.first >= tStop) { + break; + } + + runs += fmt::format("{},", run); + } + + if (!runs.empty()) { + runs.pop_back(); + } + + return runs; +} + +//---------------------------------------------------------------------------- +void selectIssues(ISSUEMAP issuesPerCh[10], const RBMAP& runBoundaries, uint64_t minDuration) +{ + /// select HV/LV issues of a minimum duration (ms) occurring during runs + + for (int ch = 0; ch < 10; ++ch) { + for (auto& issues : issuesPerCh[ch]) { + for (auto itIssue = issues.second.begin(); itIssue != issues.second.end();) { + + auto tStart = std::get<0>(*itIssue); + auto tStop = std::get<1>(*itIssue); + + if (tStop - tStart < minDuration) { + + itIssue = issues.second.erase(itIssue); + + } else { + + auto runs = findAffectedRuns(runBoundaries, tStart, tStop); + + if (runs.empty()) { + + itIssue = issues.second.erase(itIssue); + + } else { + + std::get<4>(*itIssue) = runs; + ++itIssue; + } + } + } + } + } +} + +//---------------------------------------------------------------------------- +void selectO2Issues(ISSUEMAP issuesPerCh[10], const RBMAP& runBoundaries) +{ + /// select HV issues from O2 algorithm occurring during runs + /// and restrict the range of issues to the run range + + for (int ch = 0; ch < 10; ++ch) { + for (auto& issues : issuesPerCh[ch]) { + for (auto itIssue = issues.second.begin(); itIssue != issues.second.end();) { + + auto& tStart = std::get<0>(*itIssue); + auto& tStop = std::get<1>(*itIssue); + + auto runs = findAffectedRuns(runBoundaries, tStart, tStop); + + if (runs.empty()) { + + itIssue = issues.second.erase(itIssue); + + } else { + + tStart = std::max(tStart, runBoundaries.begin()->second.first); + tStop = std::min(tStop, runBoundaries.rbegin()->second.second); + std::get<4>(*itIssue) = runs; + ++itIssue; + } + } + } + } +} + +//---------------------------------------------------------------------------- +bool eraseIssue(const ISSUE& issue, ISSUELIST& issues) +{ + /// find an issue with the same time range and associated run list and erase it + /// return true in case of success + + auto itIssue = std::find_if(issues.begin(), issues.end(), [&issue](const auto& i) { + return (std::get<0>(i) == std::get<0>(issue) && + std::get<1>(i) == std::get<1>(issue) && + std::get<4>(i) == std::get<4>(issue)); + }); + + if (itIssue != issues.end()) { + issues.erase(itIssue); + return true; + } + + return false; +} + +//---------------------------------------------------------------------------- +void printIssues(const ISSUEMAP issuesPerCh[10], const ISSUEMAP o2IssuesPerCh[10], + bool scanHV, std::string hvlvFormat) +{ + /// print all HV/LV issues + + // copy the issues so that we can modify them (i.e. add empty lists or delete issues after printing) + ISSUEMAP issuesPerChCopy[10]; + ISSUEMAP o2IssuesPerChCopy[10]; + for (int ch = 0; ch < 10; ++ch) { + issuesPerChCopy[ch] = issuesPerCh[ch]; + o2IssuesPerChCopy[ch] = o2IssuesPerCh[ch]; + } + + // make sure that all alias keys in the map o2IssuesPerChCopy are also in issuesPerChCopy in order to + // simplify the loop over all issues from both algorithms and fix the order in which they are printed + for (int ch = 0; ch < 10; ++ch) { + for (const auto& [alias, o2Issues] : o2IssuesPerChCopy[ch]) { + if (!o2Issues.empty()) { + issuesPerChCopy[ch].try_emplace(alias, ISSUELIST{}); + } + } + } + + auto printHeader = [](std::string alias) { + auto de = getDE(alias); + if (de.empty()) { + printf("Problem found for %s:\n", alias.c_str()); + } else { + printf("Problem found for %s (%s):\n", alias.c_str(), de.c_str()); + } + }; + + const auto format = fmt::format("%llu - %llu: %s (duration = %s, min = {} V, mean = {} V) --> run(s) %s\n", + hvlvFormat.c_str(), hvlvFormat.c_str()); + + auto printIssue = [&format](ISSUE issue, std::string color) { + const auto& [tStart, tStop, min, mean, runs] = issue; + printf("%s", color.c_str()); + printf(format.c_str(), tStart, tStop, + getTime(tStart).c_str(), getDuration(tStart, tStop).c_str(), min, mean, runs.c_str()); + printf("\e[0m"); + }; + + if (scanHV) { + printf("\n------ list of issues from \e[0;31mthis macro only\e[0m, \e[0;35mO2 only\e[0m, or \e[0;32mboth\e[0m ------\n"); + } else { + printf("\n------ list of issues ------\n"); + } + + bool foundIssues = false; + + for (int ch = 0; ch < 10; ++ch) { + for (const auto& [alias, issues] : issuesPerChCopy[ch]) { + + auto& o2Issues = o2IssuesPerChCopy[ch][alias]; + + if (!issues.empty() || !o2Issues.empty()) { + + foundIssues = true; + printHeader(alias); + + // print all issues found by this macro + for (const auto& issue : issues) { + // change color if the issue is not found by the O2 algorithm (only for HV) + std::string color = (scanHV && !eraseIssue(issue, o2Issues)) ? "\e[0;31m" : "\e[0;32m"; + printIssue(issue, color); + } + + // print other issues found by the O2 algorithm + for (const auto& issue : o2Issues) { + printIssue(issue, "\e[0;35m"); + } + + printf("----------------------------\n"); + } + } + } + + if (!foundIssues) { + printf("----------------------------\n"); + } +} + +//---------------------------------------------------------------------------- +int main(int argc, char** argv) +{ + /// scan HV or LV CCDB objects looking for issues + + std::string runList = ""; + std::string what = ""; + std::string config = ""; + uint64_t minDuration = 0; + int warningLevel = 1; + int printLevel = 1; + std::string outFileName = ""; + + po::options_description usage("Usage"); + // clang-format off + usage.add_options() + ("help,h", "produce help message") + ("runs,r",po::value(&runList)->default_value(""),"run(s) to scan (comma separated list of runs or ASCII file with one run per line)") + ("channels,c",po::value(&what)->default_value(""),R"(channel(s) to scan ("HV" or "LV" or comma separated list of (part of) DCS aliases))") + ("configKeyValues",po::value(&config)->default_value(""),"Semicolon separated key=value strings to change HV thresholds") + ("duration,d",po::value(&minDuration)->default_value(0),"minimum duration (ms) of HV/LV issues to consider") + ("warning,w",po::value(&warningLevel)->default_value(1),"warning level (0, 1 or 2)") + ("print,p",po::value(&printLevel)->default_value(1),"print level (0, 1, 2 or 3)") + ("output,o",po::value(&outFileName)->default_value("scan.root"),"output root file name") + ; + // clang-format on + + po::options_description cmdline; + cmdline.add(usage); + + po::variables_map vm; + po::store(po::command_line_parser(argc, argv).options(cmdline).run(), vm); + + if (vm.count("help")) { + std::cout << "This program scans HV or LV channels looking for issues\n"; + std::cout << usage << "\n"; + return 2; + } + + try { + po::notify(vm); + } catch (const po::error& e) { + std::cout << "error: " << e.what() << "\n"; + exit(1); + } + + if (runList.empty()) { + printf("error: you must provide run(s) to scan\n"); + exit(1); + } + + if (what.empty()) { + printf("error: you must provide channel(s) to scan\n"); + exit(1); + } + + // setup printout and display + const double hvRange[2] = {-10., 1700.}; + const double lvRange[3] = {-1., 4., 8.}; // min, max FeeAnalog/FeeDigital, max Solar + const std::string hvFormat = "%7.2f"; + const std::string lvFormat = "%4.2f"; + gStyle->SetPalette(kVisibleSpectrum); + + // setup algorithms searching for HV issues + conf::ConfigurableParam::updateFromString(config); + conf::ConfigurableParam::setValue("MCHStatusMap.hvMinDuration", std::to_string(minDuration)); + conf::ConfigurableParam::setValue("MCHStatusMap.timeMargin", "0"); // must be 0 to compare O2 with this scan + + // determine what is scanned + std::string path{}; + bool scanHV = false; + bool scanAll = false; + std::set aliases{}; + scanWhat(what, path, scanHV, scanAll, aliases); + + ccdb::CcdbApi api; + api.init("http://alice-ccdb.cern.ch"); + + // get the SOR/EOR of every runs from the list, ordered in run number + auto runBoundaries = getRunBoundaries(api, runList); + if (printLevel > 0) { + printRunBoundaries(runBoundaries); + } + checkRunBoundaries(runBoundaries); + + // extract the time boundaries for each HV/LV file in the full time range + auto dpBoundaries = getDPBoundaries(api, path.c_str(), runBoundaries.begin()->second.first, + runBoundaries.rbegin()->second.second); + if (printLevel > 0) { + printDPBoundaries(dpBoundaries, scanHV); + } + checkDPBoundaries(dpBoundaries, scanHV, runBoundaries.begin()->second.first, + runBoundaries.rbegin()->second.second); + + // loop over the HV/LV files, fill the lists of data points per chamber and find issues using O2 algorithm + DPMAP2 dpsMapsPerCh[10]; + mch::HVStatusCreator hvStatusCreator{}; + ISSUEMAP o2issuesPerCh[10]; + std::map metadata; + for (auto boundaries : dpBoundaries) { + + auto* dpMap = api.retrieveFromTFileAny(path.c_str(), metadata, boundaries.first); + + // fill the lists of data points per chamber for requested aliases + for (const auto& [dpid, dps] : *dpMap) { + std::string alias(dpid.get_alias()); + if (!mch::dcs::isValid(alias)) { + printf("error: invalid DCS alias: %s\n", alias.c_str()); + exit(1); + } + if ((scanAll || containsAKey(alias, aliases)) && (!scanHV || alias.find(".iMon") == alias.npos)) { + int chamber = mch::dcs::toInt(mch::dcs::aliasToChamber(alias)); + fillDataPoints(dps, dpsMapsPerCh[chamber][alias], boundaries.first, boundaries.second, warningLevel); + } + } + + // find issues for requested aliases using O2 algorithm (only for HV) + if (scanHV) { + hvStatusCreator.findBadHVs(*dpMap); + for (const auto& [alias, issues] : hvStatusCreator.getBadHVs()) { + if (scanAll || containsAKey(alias, aliases)) { + int chamber = mch::dcs::toInt(mch::dcs::aliasToChamber(alias)); + fillO2Issues(issues, o2issuesPerCh[chamber][alias], boundaries.first, boundaries.second); + } + } + } + } + if (printLevel > 1) { + printf("\nall data points:"); + printDataPoints(dpsMapsPerCh, scanHV ? hvFormat : lvFormat, printLevel > 2); + } + + // select the data points in the time range + selectDataPoints(dpsMapsPerCh, runBoundaries.begin()->second.first, runBoundaries.rbegin()->second.second); + if (printLevel > 1) { + printf("\ndata points in the time range covered by runs:"); + printDataPoints(dpsMapsPerCh, scanHV ? hvFormat : lvFormat, printLevel > 2); + } + + // create and fill the graphs, and find HV/LV issues + ISSUEMAP issuesPerCh[10]; + TMultiGraph* mg[10]; + std::set limits; + for (int ch = 0; ch < 10; ++ch) { + mg[ch] = new TMultiGraph; + mg[ch]->SetNameTitle(fmt::format("ch{}", ch + 1).c_str(), + fmt::format("chamber {};time;{} (V)", ch + 1, scanHV ? "HV" : "LV").c_str()); + for (const auto& [alias, dps] : dpsMapsPerCh[ch]) { + mg[ch]->Add(mapToGraph(alias, dps), "lp"); + auto limit = scanHV ? mch::StatusMapCreatorParam::Instance().hvLimits[ch] : getLVLimit(alias); + limits.emplace(limit); + findIssues(dps, limit, issuesPerCh[ch][alias]); + } + } + + // select HV/LV issues of a minimum duration (ms) occurring during runs + selectIssues(issuesPerCh, runBoundaries, minDuration); + selectO2Issues(o2issuesPerCh, runBoundaries); + printIssues(issuesPerCh, o2issuesPerCh, scanHV, scanHV ? hvFormat : lvFormat); + + // display + TCanvas* c[10]; + for (int ch = 0; ch < 10; ++ch) { + if (scanHV) { + c[ch] = drawDataPoints(mg[ch], hvRange[0], hvRange[1]); + drawLimit(mch::StatusMapCreatorParam::Instance().hvLimits[ch], c[ch]); + } else { + auto lvMax = (what.find("LV") != what.npos || what.find("Sol") != what.npos) ? lvRange[2] : lvRange[1]; + c[ch] = drawDataPoints(mg[ch], lvRange[0], lvMax); + for (auto limit : limits) { + drawLimit(limit, c[ch]); + } + } + drawRunBoudaries(runBoundaries, c[ch]); + } + + // save display + TFile dataFile(outFileName.c_str(), "recreate"); + for (int ch = 0; ch < 10; ++ch) { + c[ch]->Write(); + } + dataFile.Close(); + + return 0; +} From 8e0c5cf67c58f931d053d5dd214db1eaaa401110 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 21 Feb 2025 13:57:44 +0100 Subject: [PATCH 0006/1764] DPL Analysis: add test for identification concepts --- Framework/Core/CMakeLists.txt | 1 + Framework/Core/include/Framework/ASoA.h | 7 +- Framework/Core/test/test_Concepts.cxx | 164 ++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 Framework/Core/test/test_Concepts.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index c006a4135557b..7202e2299b7cc 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -200,6 +200,7 @@ add_executable(o2-test-framework-core test/test_CompletionPolicy.cxx test/test_ComputingResourceHelpers.cxx test/test_ComputingQuotaEvaluator.cxx + test/test_Concepts.cxx test/test_ControlServiceHelpers.cxx test/test_ConfigParamStore.cxx test/test_ConfigParamRegistry.cxx diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index cb2ff11a8e901..f21decd0d5c45 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -175,6 +175,8 @@ consteval auto intersectOriginals() namespace o2::soa { +struct Binding; + template concept not_void = requires { !std::same_as; }; @@ -192,7 +194,10 @@ template concept is_self_index_column = not_void && std::same_as; template -concept is_index_column = !is_self_index_column && (requires { &C::getId; } || requires { &C::getIds; }); +concept is_index_column = !is_self_index_column && requires(C c, o2::soa::Binding b) { + { c.setCurrentRaw(b) } -> std::same_as; + requires std::same_as; +}; template using is_external_index_t = typename std::conditional_t, std::true_type, std::false_type>; diff --git a/Framework/Core/test/test_Concepts.cxx b/Framework/Core/test/test_Concepts.cxx new file mode 100644 index 0000000000000..00ad931828b44 --- /dev/null +++ b/Framework/Core/test/test_Concepts.cxx @@ -0,0 +1,164 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#include "Framework/ASoA.h" +#include "Framework/AnalysisDataModel.h" +#include "Framework/Expressions.h" +#include "Framework/AnalysisHelpers.h" +#include "Framework/AnalysisTask.h" +#include "Framework/Condition.h" +#include "SimulationDataFormat/O2DatabasePDG.h" + +#include + +using namespace o2::framework; +using namespace o2::soa; +using namespace o2; + +struct P { + void process1(aod::Collisions const&) + { + } + + PROCESS_SWITCH(P, process1, "", true); +}; + +TEST_CASE("IdentificationConcepts") +{ + // ASoA + int i; + REQUIRE(not_void); + + REQUIRE(is_persistent_column); + + REQUIRE(is_self_index_column); + + REQUIRE(!is_index_column); + REQUIRE(is_index_column); + REQUIRE(is_index_column); + + REQUIRE(o2::aod::is_aod_hash>); + REQUIRE(o2::aod::is_origin_hash>); + + REQUIRE(has_parent_t); + + REQUIRE(is_metadata); + + REQUIRE(is_metadata_trait>>); + + REQUIRE(has_metadata>>); + + REQUIRE(has_extension>::metadata>); + + REQUIRE(is_spawnable_column); + + REQUIRE(is_indexing_column>); + + REQUIRE(is_dynamic_column>); + + REQUIRE(is_marker_column>); + + REQUIRE(is_column); + REQUIRE(is_column>); + REQUIRE(is_column>); + REQUIRE(is_column>); + + REQUIRE(is_table); + + REQUIRE(is_iterator); + + REQUIRE(with_originals); + + REQUIRE(with_sources>::metadata>); + + REQUIRE(with_base_table); + + REQUIRE(is_index_table); + + Preslice ps = o2::aod::track::collisionId; + REQUIRE(is_preslice); + + REQUIRE(has_filtered_policy::iterator>); + + REQUIRE(is_filtered_iterator::iterator>); + + REQUIRE(is_filtered_table>); + + REQUIRE(is_filtered::iterator>); + REQUIRE(is_filtered>); + + REQUIRE(is_not_filtered_table); + + REQUIRE(is_join); + + auto tl = []() -> SmallGroups { return {std::vector>{}, SelectionVector{}, 0}; }; + REQUIRE(is_smallgroups); + + // AnalysisHelpers + REQUIRE(is_producable); + + Produces prod; + REQUIRE(is_produces); + + struct : ProducesGroup { + Produces p; + } prodg; + REQUIRE(is_produces_group); + + REQUIRE(is_spawnable); + + Spawns spw; + REQUIRE(is_spawns); + + Builds bld; + REQUIRE(is_builds); + + OutputObj oo{"test"}; + REQUIRE(is_outputobj); + + Service srv; + REQUIRE(is_service); + + Partition part = o2::aod::track::collisionId >= 0; + REQUIRE(is_partition); + + // AnalysisTask + Enumeration<0, 1> en; + REQUIRE(is_enumeration); + + // Condition + Condition c{""}; + REQUIRE(is_condition); + + struct : ConditionGroup { + Condition c{""}; + } cg; + REQUIRE(is_condition_group); + + // Configurable + Configurable cc{"", 1, ""}; + REQUIRE(is_configurable); + + ConfigurableAxis ca{"", {0, 1, 2, 3}, ""}; + REQUIRE(is_configurable_axis); + + REQUIRE(is_process_configurable); + + struct : ConfigurableGroup { + Configurable c{"", 1, ""}; + } ccg; + REQUIRE(is_configurable_group); + + // Expressions + expressions::Filter f = o2::aod::track::pt > 1.0f; + REQUIRE(expressions::is_filter); +} From 47ce5171abaa392163d944cd0423cc3d1fbe2144 Mon Sep 17 00:00:00 2001 From: wiechula Date: Fri, 7 Mar 2025 13:25:15 +0100 Subject: [PATCH 0007/1764] GPU: remove unnecessary check --- GPU/GPUTracking/Global/GPUChainTracking.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index a63886b93ccf9..0e1cde343135e 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -298,8 +298,8 @@ bool GPUChainTracking::ValidateSettings() GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } - if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { - GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().runMC)) { + GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with MC labels"); return false; } if (GetRecoSteps() & RecoStep::TRDTracking) { From b0476bbaaa58005dbd0f4c136c06dd8f0a397e85 Mon Sep 17 00:00:00 2001 From: Robert Muenzer Date: Wed, 5 Mar 2025 13:33:00 +0100 Subject: [PATCH 0008/1764] Decrease SAC writeout rate by 10 --- prodtests/full-system-test/aggregator-workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prodtests/full-system-test/aggregator-workflow.sh b/prodtests/full-system-test/aggregator-workflow.sh index 23336cafffab8..90abe9786f89e 100755 --- a/prodtests/full-system-test/aggregator-workflow.sh +++ b/prodtests/full-system-test/aggregator-workflow.sh @@ -298,7 +298,7 @@ crus="0-359" # to be used with $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC or ALL lanesFactorize=${O2_TPC_IDC_FACTORIZE_NLANES:-12} threadFactorize=${O2_TPC_IDC_FACTORIZE_NTHREADS:-16} nTFs=$((1000 * 128 / ${NHBPERTF})) -nTFs_SAC=$((1000 * 128 / ${NHBPERTF})) +nTFs_SAC=$((10000 * 128 / ${NHBPERTF})) nBuffer=$((100 * 128 / ${NHBPERTF})) IDC_DELTA="--disable-IDCDelta true" # off by default # deltas are on by default; you need to request explicitly to switch them off; From adea3ba09468dfddb1f579e2172d3885bc97b92f Mon Sep 17 00:00:00 2001 From: Andreas Molander Date: Tue, 18 Feb 2025 14:16:31 +0200 Subject: [PATCH 0009/1764] Update CODEOWNERS for FIT Sahil is the new FIT software coordinator. Maciej and Arvind are no longer in the team. --- CODEOWNERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 92999185d6f31..3f6f4a9e42600 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -29,7 +29,7 @@ /DataFormats/Detectors/CPV @peressounko @kharlov /DataFormats/Detectors/CTP @lietava /DataFormats/Detectors/EMCAL @mfasDa @jokonig -/DataFormats/Detectors/FIT @jotwinow @afurs @andreasmolander @arvindkhuntia @mslupeck +/DataFormats/Detectors/FIT @jotwinow @afurs @andreasmolander @sahilupadhyaya92 /DataFormats/Detectors/FOCAL @maxrauch @mfasDa @iarsene @matthiasrichter /DataFormats/Detectors/GlobalTracking @shahor02 /DataFormats/Detectors/GlobalTrackingWorkflow @shahor02 @@ -59,7 +59,7 @@ /Detectors/Calibration @chiarazampolli @shahor02 /Detectors/CPV @peressounko @kharlov /Detectors/EMCAL @mfasDa @jokonig -/Detectors/FIT @jotwinow @afurs @andreasmolander @arvindkhuntia @mslupeck +/Detectors/FIT @jotwinow @afurs @andreasmolander @sahilupadhyaya92 /Detectors/FOCAL @maxrauch @mfasDa @iarsene @matthiasrichter /Detectors/Geometry @sawenzel @shahor02 /Detectors/GlobalTracking @shahor02 From 699feecb5261ff63bee7358c53f3505fd789c363 Mon Sep 17 00:00:00 2001 From: pillot Date: Fri, 7 Mar 2025 11:48:32 +0100 Subject: [PATCH 0010/1764] add protection against missing object --- .../MCH/Conditions/src/bad-channels-ccdb.cxx | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx index d355a209329ca..04614d8ccd34d 100644 --- a/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx @@ -100,7 +100,12 @@ std::set listTSWhenBadChannelsChange(const std::string ccdbUrl, const std::string currentETag{}; for (auto itTS = tsChanges.begin(); itTS != tsChanges.end();) { auto headers = api.retrieveHeaders(source, metadata, *itTS); - if (headers["ETag"] == currentETag) { + if (headers["ETag"].empty()) { + std::cout << "- Warning: missing file" << std::endl; + auto validUntil = (std::next(itTS) != tsChanges.end()) ? *std::next(itTS) : endTimestamp; + std::cout << fmt::format(" validity range: {} - {}\n", *itTS, validUntil); + ++itTS; + } else if (headers["ETag"] == currentETag) { itTS = tsChanges.erase(itTS); } else { if (verbose) { @@ -129,6 +134,19 @@ BadChannelsVector queryBadChannels(const std::string ccdbUrl, std::map metadata; auto source = ccdbPath(badChannelType); auto* badChannels = api.retrieveFromTFileAny(source, metadata, timestamp); + if (badChannels == nullptr) { + std::cout << "do you want to proceed anyway? [y/n] "; + std::string response{}; + while (true) { + std::cin >> response; + if (response == "y") { + std::cout << "number of bad channels = 0" << std::endl; + return {}; + } else if (response == "n") { + exit(3); + } + } + } std::cout << "number of bad channels = " << badChannels->size() << std::endl; if (verbose) { for (const auto& badChannel : *badChannels) { From f34afc2da0528888b44d015a0306ef47d62e41b2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 7 Mar 2025 17:21:54 +0100 Subject: [PATCH 0011/1764] GPU: Bump required LLVM version for OpenCL --- dependencies/FindO2GPU.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index c5d53d6359ada..57c820fbe86b1 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -175,7 +175,7 @@ if(ENABLE_OPENCL) if(Clang_FOUND AND LLVM_FOUND AND NOT LLVM_CLANG STREQUAL "LLVM_CLANG-NOTFOUND" - AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0) + AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 18.0) set(OPENCL_COMPATIBLE_CLANG_FOUND ON) endif() if(OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2 From 9e2d4c5f14311d41e80325fc373fad916b66f3c4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 7 Mar 2025 17:22:01 +0100 Subject: [PATCH 0012/1764] GPU: Simplify and cleanup code --- .../Global/GPUChainTrackingSectorTracker.cxx | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index df7c513fc1120..dd7fe285265ad 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -93,6 +93,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } bool streamInit[GPUCA_MAX_STREAMS] = {false}; + int32_t streamInitAndOccMap = mRec->NStreams() - 1; + if (doGPU) { for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); @@ -113,18 +115,12 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() return 2; } - WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); + WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, &mEvents->init); - for (int32_t i = 0; i < mRec->NStreams() - 1; i++) { - streamInit[i] = false; - } - streamInit[mRec->NStreams() - 1] = true; - } - if (GPUDebug("Initialization (1)", 0)) { - return (2); + std::fill(streamInit, streamInit + mRec->NStreams(), false); + streamInit[streamInitAndOccMap] = true; } - int32_t streamOccMap = mRec->NStreams() - 1; if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]); } @@ -134,21 +130,21 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap; auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU); - runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); - runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); - runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2); + runKernel(GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); + runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamInitAndOccMap), ptrTmp); + runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamInitAndOccMap), ptrTmp, ptr + 2); mRec->ReturnVolatileMemory(); mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage; if (doGPU) { - GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); + GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamInitAndOccMap, false, &mEvents->init); } else { - TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); + TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap, &mEvents->init); } } if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; @@ -190,19 +186,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - // Initialize temporary memory where needed - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Sector Data to GPU and initializing temporary memory"); - } runKernel(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - - if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory - } - if (GPUDebug("Initialization (3)", useStream)) { - throw std::runtime_error("memcpy failure"); - } - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; From 5756ea1683c23dddb348ef70ac307812d47be548 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Feb 2025 09:23:58 +0100 Subject: [PATCH 0013/1764] DPL: cleanup state switching - Remove duplicate helper - Add signposts to mark streaming states transitions - Notify driver --- Framework/Core/src/DataProcessingDevice.cxx | 44 ++++++++++----------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index 8a3fbbcf5b2f1..e8676995772e6 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -141,6 +141,17 @@ void on_transition_requested_expired(uv_timer_t* handle) state.transitionHandling = TransitionHandlingState::Expired; } +auto switchState(ServiceRegistryRef& ref, StreamingState newState) -> void +{ + auto& state = ref.get(); + auto& context = ref.get(); + O2_SIGNPOST_ID_FROM_POINTER(dpid, device, &context); + O2_SIGNPOST_END(device, dpid, "state", "End of processing state %d", (int)state.streaming); + O2_SIGNPOST_START(device, dpid, "state", "Starting processing state %d", (int)newState); + state.streaming = newState; + ref.get().notifyStreamingState(state.streaming); +}; + void on_data_processing_expired(uv_timer_t* handle) { auto* ref = (ServiceRegistryRef*)handle->data; @@ -1236,7 +1247,7 @@ void DataProcessingDevice::PreRun() O2_SIGNPOST_ID_FROM_POINTER(cid, device, state.loop); O2_SIGNPOST_START(device, cid, "PreRun", "Entering PreRun callback."); state.quitRequested = false; - state.streaming = StreamingState::Streaming; + switchState(ref, StreamingState::Streaming); state.allowedProcessing = DeviceState::Any; for (auto& info : state.inputChannelInfos) { if (info.state != InputChannelState::Pull) { @@ -1365,10 +1376,10 @@ void DataProcessingDevice::Run() // Check if we only have timers auto& spec = ref.get(); if (hasOnlyTimers(spec)) { - state.streaming = StreamingState::EndOfStreaming; + switchState(ref, StreamingState::EndOfStreaming); } - // If this is a source device, dataTransitionTimeout and dataProcessingTimeout are effectively + // If this is a source device, exitTransitionTimeout and dataProcessingTimeout are effectively // the same (because source devices are not allowed to produce any calibration). // should be the same. if (hasOnlyGenerated(spec) && deviceContext.dataProcessingTimeout > 0) { @@ -1385,7 +1396,8 @@ void DataProcessingDevice::Run() state.transitionHandling = TransitionHandlingState::Requested; ref.get().call(ServiceRegistryRef{ref}); uv_update_time(state.loop); - O2_SIGNPOST_EVENT_EMIT(calibration, lid, "timer_setup", "Starting %d s timer for exitTransitionTimeout.", deviceContext.exitTransitionTimeout); + O2_SIGNPOST_EVENT_EMIT(calibration, lid, "timer_setup", "Starting %d s timer for exitTransitionTimeout.", + deviceContext.exitTransitionTimeout); uv_timer_start(deviceContext.gracePeriodTimer, on_transition_requested_expired, deviceContext.exitTransitionTimeout * 1000, 0); if (mProcessingPolicies.termination == TerminationPolicy::QUIT) { O2_SIGNPOST_EVENT_EMIT_INFO(device, lid, "run_loop", "New state requested. Waiting for %d seconds before quitting.", (int)deviceContext.exitTransitionTimeout); @@ -1728,15 +1740,6 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref) { auto& context = ref.get(); O2_SIGNPOST_ID_FROM_POINTER(dpid, device, &context); - auto switchState = [ref](StreamingState newState) { - auto& state = ref.get(); - auto& context = ref.get(); - O2_SIGNPOST_ID_FROM_POINTER(dpid, device, &context); - O2_SIGNPOST_END(device, dpid, "state", "End of processing state %d", (int)state.streaming); - O2_SIGNPOST_START(device, dpid, "state", "Starting processing state %d", (int)newState); - state.streaming = newState; - ref.get().notifyStreamingState(state.streaming); - }; auto& state = ref.get(); auto& spec = ref.get(); @@ -1772,7 +1775,7 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref) // dependent on the callback, not something which is controlled by the // framework itself. if (context.allDone == true && state.streaming == StreamingState::Streaming) { - switchState(StreamingState::EndOfStreaming); + switchState(ref, StreamingState::EndOfStreaming); state.lastActiveDataProcessor = &context; } @@ -1818,7 +1821,7 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref) } // This is needed because the transport is deleted before the device. relayer.clear(); - switchState(StreamingState::Idle); + switchState(ref, StreamingState::Idle); // In case we should process, note the data processor responsible for it if (shouldProcess) { state.lastActiveDataProcessor = &context; @@ -2328,13 +2331,6 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v } }; - auto switchState = [ref](StreamingState newState) { - auto& control = ref.get(); - auto& state = ref.get(); - state.streaming = newState; - control.notifyStreamingState(state.streaming); - }; - ref.get().getReadyToProcess(completed); if (completed.empty() == true) { LOGP(debug, "No computations available for dispatching."); @@ -2510,7 +2506,7 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v O2_SIGNPOST_EVENT_EMIT(device, pcid, "device", "Skipping processing because we are discarding."); } else { O2_SIGNPOST_EVENT_EMIT(device, pcid, "device", "No processing callback provided. Switching to %{public}s.", "Idle"); - state.streaming = StreamingState::Idle; + switchState(ref, StreamingState::Idle); } if (shouldProcess(action)) { auto& timingInfo = ref.get(); @@ -2598,7 +2594,7 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v for (auto& channel : spec.outputChannels) { DataProcessingHelpers::sendEndOfStream(ref, channel); } - switchState(StreamingState::Idle); + switchState(ref, StreamingState::Idle); } return true; From 80faf100ebd930094d01ea63e0ffdbd3d64b2c67 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Feb 2025 09:28:46 +0100 Subject: [PATCH 0014/1764] DPL: correctly handle data-processing-timeouts in sources --- Framework/Core/src/DataProcessingDevice.cxx | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index e8676995772e6..7f42805cfdb1e 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -156,16 +156,19 @@ void on_data_processing_expired(uv_timer_t* handle) { auto* ref = (ServiceRegistryRef*)handle->data; auto& state = ref->get(); + auto& spec = ref->get(); state.loopReason |= DeviceState::TIMER_EXPIRED; // Check if this is a source device O2_SIGNPOST_ID_FROM_POINTER(cid, device, handle); - // Source devices should never end up in this callback, since the exitTransitionTimeout should - // be reset to the dataProcessingTimeout and the timers cohalesced. - assert(hasOnlyGenerated(ref->get()) == false); - O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for data processing expired. Only calibrations from this point onwards."); - state.allowedProcessing = DeviceState::CalibrationOnly; + if (hasOnlyGenerated(spec)) { + O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for data processing expired. Switching to EndOfStreaming."); + switchState(*ref, StreamingState::EndOfStreaming); + } else { + O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for data processing expired. Only calibrations from this point onwards."); + state.allowedProcessing = DeviceState::CalibrationOnly; + } } void on_communication_requested(uv_async_t* s) @@ -1379,13 +1382,6 @@ void DataProcessingDevice::Run() switchState(ref, StreamingState::EndOfStreaming); } - // If this is a source device, exitTransitionTimeout and dataProcessingTimeout are effectively - // the same (because source devices are not allowed to produce any calibration). - // should be the same. - if (hasOnlyGenerated(spec) && deviceContext.dataProcessingTimeout > 0) { - deviceContext.exitTransitionTimeout = deviceContext.dataProcessingTimeout; - } - // We do not do anything in particular if the data processing timeout would go past the exitTransitionTimeout if (deviceContext.dataProcessingTimeout > 0 && deviceContext.dataProcessingTimeout < deviceContext.exitTransitionTimeout) { uv_update_time(state.loop); From 3961e74342b312adddee96dcdfa2cf8eb8928018 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Feb 2025 10:36:24 +0100 Subject: [PATCH 0015/1764] DPL: drop messages explicitly Somehow if they remain in the queue the confuse the oldest possible timeframe mechanism. --- Framework/Core/src/DataRelayer.cxx | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/DataRelayer.cxx b/Framework/Core/src/DataRelayer.cxx index c2ae459aace38..385d9a6c50c4a 100644 --- a/Framework/Core/src/DataRelayer.cxx +++ b/Framework/Core/src/DataRelayer.cxx @@ -17,6 +17,7 @@ #include "Framework/DataDescriptorMatcher.h" #include "Framework/DataSpecUtils.h" #include "Framework/DataProcessingHeader.h" +#include "Framework/DataProcessingContext.h" #include "Framework/DataRef.h" #include "Framework/InputRecord.h" #include "Framework/InputSpan.h" @@ -46,7 +47,6 @@ #include #include #include -#include #include using namespace o2::framework::data_matcher; @@ -55,6 +55,8 @@ using DataProcessingHeader = o2::framework::DataProcessingHeader; using Verbosity = o2::monitoring::Verbosity; O2_DECLARE_DYNAMIC_LOG(data_relayer); +// Stream which keeps track of the calibration lifetime logic +O2_DECLARE_DYNAMIC_LOG(calibration); namespace o2::framework { @@ -480,6 +482,13 @@ DataRelayer::RelayChoice // We are in calibration mode and the data does not have the calibration bit set. // We do not store it. if (services.get().allowedProcessing == DeviceState::ProcessingType::CalibrationOnly && !isCalibrationData(messages[mi])) { + O2_SIGNPOST_ID_FROM_POINTER(cid, calibration, &services.get()); + O2_SIGNPOST_EVENT_EMIT(calibration, cid, "calibration", + "Dropping incoming %zu messages because they are data processing.", nPayloads); + // Actually dropping messages. + for (size_t i = mi; i < mi + nPayloads + 1; i++) { + auto discard = std::move(messages[i]); + } mi += nPayloads; continue; } From 0cdfe91a3e879d32ab246e83556ed852d84911cb Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 9 Mar 2025 16:11:47 +0100 Subject: [PATCH 0016/1764] DPL: add dumping of memory profile in strategic location --- .../AnalysisSupport/src/DataInputDirector.cxx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 981ca5254980d..2c51360cd9923 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -15,6 +15,7 @@ #include "Framework/RootArrowFilesystem.h" #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/Output.h" +#include "Framework/Signpost.h" #include "Headers/DataHeader.h" #include "Framework/TableTreeHelpers.h" #include "Monitoring/Tags.h" @@ -41,6 +42,9 @@ #include #endif +#include +O2_DECLARE_DYNAMIC_LOG(reader_memory_dump); + namespace o2::framework { using namespace rapidjson; @@ -458,6 +462,17 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh mIOTime += (uv_hrtime() - ioStart); + O2_SIGNPOST_ACTION(reader_memory_dump, [](void*) { + void (*dump_)(const char*); + if (void* sym = dlsym(nullptr, "igprof_dump_now")) { + dump_ = __extension__(void (*)(const char*)) sym; + if (dump_) { + std::string filename = fmt::format("reader-memory-dump-{}.gz", uv_hrtime()); + dump_(filename.c_str()); + } + } + }); + return true; } From f6c7f18db2818535564090df92cbd0a84828f892 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 12 Feb 2025 16:31:38 +0100 Subject: [PATCH 0017/1764] Remove CodeCov badge --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 3dc6061a39351..8715613a0eb5c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ -[![codecov](https://codecov.io/gh/AliceO2Group/AliceO2/branch/dev/graph/badge.svg)](https://codecov.io/gh/AliceO2Group/AliceO2/branches/dev) [![JIRA](https://img.shields.io/badge/JIRA-Report%20issue-blue.svg)](https://alice.its.cern.ch/jira/secure/CreateIssue.jspa?pid=11201&issuetype=1) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1493334.svg)](https://doi.org/10.5281/zenodo.1493334) From 88d8400ba8398f7d06d531bbad888e25898675a1 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 12 Feb 2025 16:35:21 +0100 Subject: [PATCH 0018/1764] Fix Format --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8715613a0eb5c..5bdbc4a801e5e 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,8 @@ Rules and instructions are available in the repository ### Enable C++ compiler warnings -Currently O2 is built with minimal compiler warnings enabled. This is going to change in the near future. In the transition period, developers have to manualy enable warnings by building O2 with `ALIBUILD_O2_WARNINGS` environment variable set e.g. using the `-e` option of `alibuild` e.g: -```bash +Currently O2 is built with minimal compiler warnings enabled. This is going to change in the near future. In the transition period, developers have to manualy enable warnings by building O2 with `ALIBUILD_O2_WARNINGS` environment variable set e.g. using the `-e` option of `alibuild` e.g: +```bash aliBuild build --debug -e ALIBUILD_O2_WARNINGS=1 --defaults o2 O2 -``` +``` A helper script that extracts warnings from the build log skipping duplicates is available [here](https://github.com/AliceO2Group/AliceO2/blob/dev/scripts/filter-warnings.sh) From 8290f89678f78df47310aabb749f2b5157138a62 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 8 Mar 2025 20:52:27 +0100 Subject: [PATCH 0019/1764] GPU: Fix compile warning from shadowed variable, disentangle filter types --- .../Global/GPUChainTrackingCompression.cxx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 1b08de21abd0f..57a759a58924e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,8 +201,8 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { - const bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); - if (runFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + const bool runFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); + if (runFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); } @@ -219,13 +219,13 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; + auto allocatorUse = runFullFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { GPUError("Error decompressing clusters"); return 1; } - if (runFiltering) { + if (runFullFiltering) { RunTPCClusterFilter(mClusterNativeAccess.get(), allocatorFinal, GetProcessingSettings().tpcApplyCFCutsAtDecoding); } decompressTimer.Stop(); @@ -245,7 +245,7 @@ int32_t GPUChainTracking::RunTPCDecompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR")); RecoStep myStep = RecoStep::TPCDecompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression; - bool runFiltering = param().tpcCutTimeBin > 0; + bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0; GPUTPCDecompression& Decompressor = processors()->tpcDecompressor; GPUTPCDecompression& DecompressorShadow = doGPU ? processorsShadow()->tpcDecompressor : Decompressor; const auto& threadContext = GetThreadContext(); @@ -325,7 +325,7 @@ int32_t GPUChainTracking::RunTPCDecompression() if (decodedAttachedClusters != cmprClsHost.nAttachedClusters) { GPUWarning("%u / %u clusters failed track model decoding (%f %%)", cmprClsHost.nAttachedClusters - decodedAttachedClusters, cmprClsHost.nAttachedClusters, 100.f * (float)(cmprClsHost.nAttachedClusters - decodedAttachedClusters) / (float)cmprClsHost.nAttachedClusters); } - if (runFiltering) { // If filtering, allocate a temporary buffer and cluster native access in decompressor context + if (runTimeBinCutFiltering) { // If filtering, allocate a temporary buffer and cluster native access in decompressor context Decompressor.mNClusterNativeBeforeFiltering = DecompressorShadow.mNClusterNativeBeforeFiltering = decodedAttachedClusters + cmprClsHost.nUnattachedClusters; AllocateRegisteredMemory(Decompressor.mResourceTmpBufferBeforeFiltering); AllocateRegisteredMemory(Decompressor.mResourceClusterNativeAccess); @@ -362,13 +362,13 @@ int32_t GPUChainTracking::RunTPCDecompression() int32_t iStream = (iSector / batchSize) % mRec->NStreams(); runKernel({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSector, batchSize); uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSector, mClusterNativeAccess->nClustersSector + iSector + batchSize, 0u); - if (!runFiltering) { + if (!runTimeBinCutFiltering) { GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSector][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSector][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false); } } SynchronizeGPU(); - if (runFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly + if (runTimeBinCutFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly AllocateRegisteredMemory(Decompressor.mResourceNClusterPerSectorRow); WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream); runKernel({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0])); From dd2d2aa66ec6b004806d190876392df908473e81 Mon Sep 17 00:00:00 2001 From: TrifleMichael <61475109+TrifleMichael@users.noreply.github.com> Date: Mon, 10 Mar 2025 11:19:57 +0100 Subject: [PATCH 0020/1764] CCDBApi: Fix CCDBDownloader redirect errors (#14029) * Fixing CcdbDownloader redirects This commit addresses: - Not following available redirects after receiving 4xx http code. - Not following all redirects provided via "Location" header. - Not following redirects after failing alien:/ or file:/ retrieval. - Improper fail-check in CcdbApi::loadLocalContentToMemory. - The headers holding etags and content-type from multiple locations. * Removing whitespaces --- CCDB/include/CCDB/CCDBDownloader.h | 8 +++-- CCDB/src/CCDBDownloader.cxx | 53 +++++++++++++++++++++--------- CCDB/src/CcdbApi.cxx | 37 ++++++++++++++++++--- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/CCDB/include/CCDB/CCDBDownloader.h b/CCDB/include/CCDB/CCDBDownloader.h index 0bda186e308c6..6c057a537a096 100644 --- a/CCDB/include/CCDB/CCDBDownloader.h +++ b/CCDB/include/CCDB/CCDBDownloader.h @@ -47,6 +47,7 @@ struct HeaderObjectPair_t { typedef struct DownloaderRequestData { std::vector hosts; + std::vector locations; std::string path; long timestamp; HeaderObjectPair_t hoPair; @@ -231,12 +232,13 @@ class CCDBDownloader std::string prepareRedirectedURL(std::string address, std::string potentialHost) const; /** - * Returns a vector of possible content locations based on the redirect headers. + * Updates the locations vector with the the locations. * - * @param baseUrl Content path. * @param headerMap Map containing response headers. + * @param locations Location list to be updated. + * @param locIndex Index of the next locaiton to be tried. */ - std::vector getLocations(std::multimap* headerMap) const; + void updateLocations(std::multimap* headerMap, std::vector* locations, int* locIndex) const; std::string mUserAgentId = "CCDBDownloader"; /** diff --git a/CCDB/src/CCDBDownloader.cxx b/CCDB/src/CCDBDownloader.cxx index 3fca3c8cc2ae6..2f033a50b36e7 100644 --- a/CCDB/src/CCDBDownloader.cxx +++ b/CCDB/src/CCDBDownloader.cxx @@ -362,7 +362,7 @@ void CCDBDownloader::tryNewHost(PerformData* performData, CURL* easy_handle) { auto requestData = performData->requestData; std::string newUrl = requestData->hosts.at(performData->hostInd) + "/" + requestData->path + "/" + std::to_string(requestData->timestamp); - LOG(debug) << "Connecting to another host " << newUrl; + LOG(debug) << "Connecting to another host " << newUrl << "\n"; requestData->hoPair.header.clear(); curl_easy_setopt(easy_handle, CURLOPT_URL, newUrl.c_str()); mHandlesToBeAdded.push_back(easy_handle); @@ -374,9 +374,11 @@ void CCDBDownloader::getLocalContent(PerformData* performData, std::string& newL LOG(debug) << "Redirecting to local content " << newLocation << "\n"; if (requestData->localContentCallback(newLocation)) { contentRetrieved = true; + LOG(debug) << "Local content retrieved succesfully: " << newLocation << " n"; } else { // Prepare next redirect url newLocation = getNewLocation(performData, locations); + LOG(debug) << "Failed to retrieve local content: " << newLocation << "\n"; } } @@ -396,7 +398,7 @@ std::string CCDBDownloader::getNewLocation(PerformData* performData, std::vector void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLocation, CURL* easy_handle) { auto requestData = performData->requestData; - LOG(debug) << "Trying content location " << newLocation; + LOG(debug) << "Trying content location " << newLocation << "\n"; curl_easy_setopt(easy_handle, CURLOPT_URL, newLocation.c_str()); mHandlesToBeAdded.push_back(easy_handle); } @@ -404,7 +406,7 @@ void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLoca void CCDBDownloader::followRedirect(PerformData* performData, CURL* easy_handle, std::vector& locations, bool& rescheduled, bool& contentRetrieved) { std::string newLocation = getNewLocation(performData, locations); - if (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos) { + while (!contentRetrieved && (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos)) { getLocalContent(performData, newLocation, contentRetrieved, locations); } if (!contentRetrieved && newLocation != "") { @@ -508,8 +510,8 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) std::string currentHost = requestData->hosts[performData->hostInd]; std::string loggingMessage = prepareLogMessage(currentHost, requestData->userAgent, requestData->path, requestData->timestamp, requestData->headers, httpCode); - // Get alternative locations for the same host - auto locations = getLocations(&(requestData->hoPair.header)); + // Get new locations based on received headers + updateLocations(&(requestData->hoPair.header), &requestData->locations, &performData->locInd); // React to received http code if (200 <= httpCode && httpCode < 400) { @@ -517,8 +519,8 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) if (304 == httpCode) { LOGP(debug, "Object exists but I am not serving it since it's already in your possession"); contentRetrieved = true; - } else if (300 <= httpCode && httpCode < 400 && performData->locInd < locations.size()) { - followRedirect(performData, easy_handle, locations, rescheduled, contentRetrieved); + } else if (300 <= httpCode && httpCode < 400 && performData->locInd < requestData->locations.size()) { + followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved); } else if (200 <= httpCode && httpCode < 300) { contentRetrieved = true; // Can be overruled by following error check } @@ -531,8 +533,16 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) contentRetrieved = false; } - // Check if content was retrieved, or scheduled to be retrieved - if (!rescheduled && !contentRetrieved && performData->locInd == locations.size()) { + // Check if content was retrieved or scheduled to be retrieved + if (!rescheduled && !contentRetrieved) { + // Current location failed without providing 3xx http code, try next redirect for the same host + if (performData->locInd < requestData->locations.size()) { + followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved); + } + } + + // Check again because content might have been retrieved or rescheduled via a redirect + if (!rescheduled && !contentRetrieved) { // Ran out of locations to redirect, try new host if (++performData->hostInd < requestData->hosts.size()) { tryNewHost(performData, easy_handle); @@ -650,24 +660,37 @@ CURLcode CCDBDownloader::perform(CURL* handle) return batchBlockingPerform(handleVector).back(); } -std::vector CCDBDownloader::getLocations(std::multimap* headerMap) const +void CCDBDownloader::updateLocations(std::multimap* headerMap, std::vector* locations, int* locIndex) const { - std::vector locs; + std::vector newLocations; + auto iter = headerMap->find("Location"); if (iter != headerMap->end()) { - locs.push_back(iter->second); + auto range = headerMap->equal_range("Location"); + for (auto it = range.first; it != range.second; ++it) { + if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) { + if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) { + newLocations.push_back(it->second); + } + } + } } + // add alternative locations (not yet included) auto iter2 = headerMap->find("Content-Location"); if (iter2 != headerMap->end()) { auto range = headerMap->equal_range("Content-Location"); for (auto it = range.first; it != range.second; ++it) { - if (std::find(locs.begin(), locs.end(), it->second) == locs.end()) { - locs.push_back(it->second); + if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) { + if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) { + newLocations.push_back(it->second); + } } } } - return locs; + + // Insert location list at the current location index. This assures that the provided locations will be tried first. + locations->insert(locations->begin() + (*locIndex), newLocations.begin(), newLocations.end()); } std::vector CCDBDownloader::batchBlockingPerform(std::vector const& handleVector) diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index df05d393100d6..2906438211c65 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -667,6 +667,23 @@ size_t header_map_callback(char* buffer, size_t size, size_t nitems, void* userd } } } + + // Keep only the first ETag encountered + if (key == "ETag") { + auto cl = headers->find("ETag"); + if (cl != headers->end()) { + insert = false; + } + } + + // Keep only the first Content-Type encountered + if (key == "Content-Type") { + auto cl = headers->find("Content-Type"); + if (cl != headers->end()) { + insert = false; + } + } + if (insert) { headers->insert(std::make_pair(key, value)); } @@ -1971,14 +1988,26 @@ void CcdbApi::vectoredLoadFileToMemory(std::vector& requestConte bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& url) const { if (url.find("alien:/", 0) != std::string::npos) { - loadFileToMemory(dest, url, nullptr); // headers loaded from the file in case of the snapshot reading only - return true; + std::map localHeaders; + loadFileToMemory(dest, url, &localHeaders); + auto it = localHeaders.find("Error"); + if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { + return false; + } else { + return true; + } } if ((url.find("file:/", 0) != std::string::npos)) { std::string path = url.substr(7); if (std::filesystem::exists(path)) { - loadFileToMemory(dest, path, nullptr); - return true; + std::map localHeaders; + loadFileToMemory(dest, url, &localHeaders); + auto it = localHeaders.find("Error"); + if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { + return false; + } else { + return true; + } } } return false; From 622bcca8fa0fa2461a67462eca8f497900110e15 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 10 Mar 2025 10:12:29 +0100 Subject: [PATCH 0021/1764] DPL: drop obsolete TreeToTable code Now using the arrow::Dataset API. --- Framework/Core/CMakeLists.txt | 2 - .../Core/include/Framework/DataAllocator.h | 14 - .../Core/include/Framework/TableTreeHelpers.h | 54 ---- Framework/Core/src/DataAllocator.cxx | 32 --- Framework/Core/src/TableTreeHelpers.cxx | 272 ------------------ Framework/Core/test/benchmark_TreeToTable.cxx | 96 ------- Framework/Core/test/test_TreeToTable.cxx | 237 --------------- 7 files changed, 707 deletions(-) delete mode 100644 Framework/Core/test/benchmark_TreeToTable.cxx delete mode 100644 Framework/Core/test/test_TreeToTable.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 7202e2299b7cc..f059984b5d85d 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -250,7 +250,6 @@ add_executable(o2-test-framework-core test/test_Variants.cxx test/test_WorkflowHelpers.cxx test/test_WorkflowSerialization.cxx - test/test_TreeToTable.cxx test/test_DataOutputDirector.cxx test/unittest_SimpleOptionsRetriever.cxx test/unittest_DataSpecUtils.cxx @@ -348,7 +347,6 @@ foreach(b EventMixing HistogramRegistry TableToTree - TreeToTable ExternalFairMQDeviceProxies ) o2_add_executable(benchmark-${b} diff --git a/Framework/Core/include/Framework/DataAllocator.h b/Framework/Core/include/Framework/DataAllocator.h index eb63b5469bb29..287513ec85845 100644 --- a/Framework/Core/include/Framework/DataAllocator.h +++ b/Framework/Core/include/Framework/DataAllocator.h @@ -233,15 +233,6 @@ class DataAllocator return tb; } - template - requires(requires { static_cast(std::declval>()); }) - decltype(auto) make(const Output& spec, Args... args) - { - auto t2t = std::move(LifetimeHolder(new std::decay_t(args...))); - adopt(spec, t2t); - return t2t; - } - template requires(requires { static_cast(std::declval>()); }) decltype(auto) make(const Output& spec, Args... args) @@ -288,11 +279,6 @@ class DataAllocator void adopt(const Output& spec, LifetimeHolder&); - /// Adopt a Tree2Table in the framework and serialise / send - /// it as an Arrow table to all consumers of @a spec once done - void - adopt(const Output& spec, LifetimeHolder&); - /// Adopt a Source2Batch in the framework and serialise / send /// it as an Arrow Dataset to all consumers of @a spec once done void diff --git a/Framework/Core/include/Framework/TableTreeHelpers.h b/Framework/Core/include/Framework/TableTreeHelpers.h index 92725d186ee33..3f76298a5bbd4 100644 --- a/Framework/Core/include/Framework/TableTreeHelpers.h +++ b/Framework/Core/include/Framework/TableTreeHelpers.h @@ -36,19 +36,6 @@ namespace o2::framework // OR t2t.addBranch(column.get(), field.get()), ...; // . t2t.process(); // -// ............................................................................. -// ----------------------------------------------------------------------------- -// TreeToTable allows to fill the contents of a given TTree to an arrow::Table -// ColumnIterator is used by TreeToTable -// -// To copy the contents of a tree tr to a table ta do: -// . TreeToTable t2t(tr); -// . t2t.addColumn(columnname1); t2t.addColumn(columnname2); ... -// OR -// t2t.addAllColumns(); -// . auto ta = t2t.process(); -// -// ............................................................................. struct ROOTTypeInfo { EDataType type; char suffix[3]; @@ -58,29 +45,6 @@ struct ROOTTypeInfo { auto arrowTypeFromROOT(EDataType type, int size); auto basicROOTTypeFromArrow(arrow::Type::type id); -class BranchToColumn -{ - public: - BranchToColumn(TBranch* branch, bool VLA, std::string name, EDataType type, int listSize, arrow::MemoryPool* pool); - // BranchToColumn(TBranch* branch, TBranch* sizeBranch, std::string name, EDataType type, arrow::MemoryPool* pool); - ~BranchToColumn() = default; - TBranch* branch(); - - std::pair, std::shared_ptr> read(TBuffer* buffer); - - private: - TBranch* mBranch = nullptr; - bool mVLA = false; - std::string mColumnName; - EDataType mType; - std::shared_ptr mArrowType; - arrow::ArrayBuilder* mValueBuilder = nullptr; - std::unique_ptr mListBuilder = nullptr; - int mListSize = 1; - std::unique_ptr mBuilder = nullptr; - arrow::MemoryPool* mPool = nullptr; -}; - class ColumnToBranch { public: @@ -127,24 +91,6 @@ class TableToTree std::vector> mColumnReaders; }; -class TreeToTable -{ - public: - TreeToTable(arrow::MemoryPool* pool = arrow::default_memory_pool()); - void setLabel(const char* label); - void addAllColumns(TTree* tree, std::vector&& names = {}); - void fill(TTree*); - std::shared_ptr finalize(); - - private: - arrow::MemoryPool* mArrowMemoryPool; - std::vector> mBranchReaders; - std::string mTableLabel; - std::shared_ptr mTable; - - void addReader(TBranch* branch, std::string const& name, bool VLA); -}; - class FragmentToBatch { public: diff --git a/Framework/Core/src/DataAllocator.cxx b/Framework/Core/src/DataAllocator.cxx index b735eee1f3308..ca35089fdfaab 100644 --- a/Framework/Core/src/DataAllocator.cxx +++ b/Framework/Core/src/DataAllocator.cxx @@ -241,38 +241,6 @@ void DataAllocator::adopt(const Output& spec, LifetimeHolder& tb) context.addBuffer(std::move(header), buffer, std::move(finalizer), routeIndex); } -void DataAllocator::adopt(const Output& spec, LifetimeHolder& t2t) -{ - auto& timingInfo = mRegistry.get(); - RouteIndex routeIndex = matchDataHeader(spec, timingInfo.timeslice); - - auto header = headerMessageFromOutput(spec, routeIndex, o2::header::gSerializationMethodArrow, 0); - auto& context = mRegistry.get(); - - auto creator = [transport = context.proxy().getOutputTransport(routeIndex)](size_t s) -> std::unique_ptr { - return transport->CreateMessage(s); - }; - auto buffer = std::make_shared(creator); - - t2t.callback = [buffer = buffer, transport = context.proxy().getOutputTransport(routeIndex)](TreeToTable& tree) { - // Serialization happens in here, so that we can - // get rid of the intermediate tree 2 table object, saving memory. - auto table = tree.finalize(); - doWriteTable(buffer, table.get()); - // deletion happens in the caller - }; - - /// To finalise this we write the table to the buffer. - /// FIXME: most likely not a great idea. We should probably write to the buffer - /// directly in the TableBuilder, incrementally. - auto finalizer = [](std::shared_ptr b) -> void { - // This is empty because we already serialised the object when - // the LifetimeHolder goes out of scope. - }; - - context.addBuffer(std::move(header), buffer, std::move(finalizer), routeIndex); -} - void DataAllocator::adopt(const Output& spec, LifetimeHolder& f2b) { auto& timingInfo = mRegistry.get(); diff --git a/Framework/Core/src/TableTreeHelpers.cxx b/Framework/Core/src/TableTreeHelpers.cxx index 84d4ff171bc39..92231cb9ce069 100644 --- a/Framework/Core/src/TableTreeHelpers.cxx +++ b/Framework/Core/src/TableTreeHelpers.cxx @@ -102,166 +102,6 @@ auto basicROOTTypeFromArrow(arrow::Type::type id) } } -TBranch* BranchToColumn::branch() -{ - return mBranch; -} - -BranchToColumn::BranchToColumn(TBranch* branch, bool VLA, std::string name, EDataType type, int listSize, arrow::MemoryPool* pool) - : mBranch{branch}, - mVLA{VLA}, - mColumnName{std::move(name)}, - mType{type}, - mArrowType{arrowTypeFromROOT(type, listSize)}, - mListSize{listSize}, - mPool{pool} - -{ - if (mType == EDataType::kBool_t) { - if (mListSize > 1) { - auto status = arrow::MakeBuilder(mPool, mArrowType->field(0)->type(), &mBuilder); - if (!status.ok()) { - throw runtime_error("Cannot create value builder"); - } - mListBuilder = std::make_unique(mPool, std::move(mBuilder), mListSize); - mValueBuilder = static_cast(mListBuilder.get())->value_builder(); - } else { - auto status = arrow::MakeBuilder(mPool, mArrowType, &mBuilder); - if (!status.ok()) { - throw runtime_error("Cannot create builder"); - } - mValueBuilder = mBuilder.get(); - } - } -} - -std::pair, std::shared_ptr> BranchToColumn::read(TBuffer* buffer) -{ - O2_SIGNPOST_ID_FROM_POINTER(sid, tabletree_helpers, buffer); - auto totalEntries = mBranch->GetEntries(); - arrow::Status status; - int readEntries = 0; - buffer->Reset(); - std::shared_ptr array; - - if (mType == EDataType::kBool_t) { - // boolean array special case: we need to use builder to create the bitmap - status = mValueBuilder->Reserve(totalEntries * mListSize); - if (mListSize > 1) { - status &= mListBuilder->Reserve(totalEntries); - } - if (!status.ok()) { - throw runtime_error("Failed to reserve memory for array builder"); - } - while (readEntries < totalEntries) { - auto readLast = mBranch->GetBulkRead().GetBulkEntries(readEntries, *buffer); - readEntries += readLast; - status &= static_cast(mValueBuilder)->AppendValues(reinterpret_cast(buffer->GetCurrent()), readLast * mListSize); - } - if (mListSize > 1) { - status &= static_cast(mListBuilder.get())->AppendValues(readEntries); - } - if (!status.ok()) { - throw runtime_error("Failed to append values to array"); - } - if (mListSize > 1) { - status &= mListBuilder->Finish(&array); - } else { - status &= mValueBuilder->Finish(&array); - } - if (!status.ok()) { - throw runtime_error("Failed to create array"); - } - } else { - // other types: use serialized read to build arrays directly - size_t branchSize = mBranch->GetTotBytes(); - auto&& result = arrow::AllocateResizableBuffer(mBranch->GetTotBytes(), mPool); - O2_SIGNPOST_EVENT_EMIT(tabletree_helpers, sid, "BranchToColumn", "Allocating %ld bytes for %{public}s", branchSize, mBranch->GetName()); - if (!result.ok()) { - throw runtime_error("Cannot allocate values buffer"); - } - std::shared_ptr arrowValuesBuffer = std::move(result).ValueUnsafe(); - auto ptr = arrowValuesBuffer->mutable_data(); - if (ptr == nullptr) { - throw runtime_error("Invalid buffer"); - } - - auto typeSize = TDataType::GetDataType(mType)->Size(); - std::unique_ptr offsetBuffer = nullptr; - - uint32_t offset = 0; - int count = 0; - std::shared_ptr arrowOffsetBuffer; - gsl::span offsets; - int size = 0; - uint32_t totalSize = 0; - TBranch* mSizeBranch = nullptr; - if (mVLA) { - mSizeBranch = mBranch->GetTree()->GetBranch((std::string{mBranch->GetName()} + TableTreeHelpers::sizeBranchSuffix).c_str()); - offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); - result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), mPool); - if (!result.ok()) { - throw runtime_error("Cannot allocate offset buffer"); - } - arrowOffsetBuffer = std::move(result).ValueUnsafe(); - unsigned char* ptrOffset = arrowOffsetBuffer->mutable_data(); - auto* tPtrOffset = reinterpret_cast(ptrOffset); - offsets = gsl::span{tPtrOffset, tPtrOffset + totalEntries + 1}; - - // read sizes first - while (readEntries < totalEntries) { - auto readLast = mSizeBranch->GetBulkRead().GetEntriesSerialized(readEntries, *offsetBuffer); - readEntries += readLast; - for (auto i = 0; i < readLast; ++i) { - offsets[count++] = (int)offset; - offset += swap32_(reinterpret_cast(offsetBuffer->GetCurrent())[i]); - } - } - offsets[count] = (int)offset; - totalSize = offset; - readEntries = 0; - } - - while (readEntries < totalEntries) { - auto readLast = mBranch->GetBulkRead().GetEntriesSerialized(readEntries, *buffer); - if (mVLA) { - size = offsets[readEntries + readLast] - offsets[readEntries]; - } else { - size = readLast * mListSize; - } - readEntries += readLast; - swapCopy(ptr, buffer->GetCurrent(), size, typeSize); - ptr += (ptrdiff_t)(size * typeSize); - } - if (!mVLA) { - totalSize = readEntries * mListSize; - } - std::shared_ptr varray; - switch (mListSize) { - case -1: - varray = std::make_shared(mArrowType->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(mArrowType, readEntries, arrowOffsetBuffer, varray); - break; - case 1: - array = std::make_shared(mArrowType, readEntries, arrowValuesBuffer); - break; - default: - varray = std::make_shared(mArrowType->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(mArrowType, readEntries, varray); - } - } - - auto fullArray = std::make_shared(array); - auto field = std::make_shared(mBranch->GetName(), mArrowType); - - mBranch->SetStatus(false); - mBranch->DropBaskets("all"); - mBranch->Reset(); - mBranch->GetTransientBuffer(0)->Expand(0); - - return std::make_pair(fullArray, field); -} - ColumnToBranch::ColumnToBranch(TTree* tree, std::shared_ptr const& column, std::shared_ptr const& field) : mBranchName{field->name()}, mColumn{column.get()}, @@ -447,11 +287,6 @@ std::shared_ptr TableToTree::process() return mTree; } -TreeToTable::TreeToTable(arrow::MemoryPool* pool) - : mArrowMemoryPool{pool} -{ -} - namespace { struct BranchInfo { @@ -461,113 +296,6 @@ struct BranchInfo { }; } // namespace -void TreeToTable::addAllColumns(TTree* tree, std::vector&& names) -{ - auto branches = tree->GetListOfBranches(); - auto n = branches->GetEntries(); - if (n == 0) { - throw runtime_error("Tree has no branches"); - } - - std::vector branchInfos; - for (auto i = 0; i < n; ++i) { - auto branch = static_cast(branches->At(i)); - auto name = std::string{branch->GetName()}; - auto pos = name.find(TableTreeHelpers::sizeBranchSuffix); - if (pos != std::string::npos) { - name.erase(pos); - branchInfos.emplace_back(BranchInfo{name, (TBranch*)nullptr, true}); - } else { - auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { - return bi.name == name; - }); - if (lookup == branchInfos.end()) { - branchInfos.emplace_back(BranchInfo{name, branch, false}); - } else { - lookup->ptr = branch; - } - } - } - - if (names.empty()) { - for (auto& bi : branchInfos) { - addReader(bi.ptr, bi.name, bi.mVLA); - } - } else { - for (auto& name : names) { - auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { - return name == bi.name; - }); - if (lookup != branchInfos.end()) { - addReader(lookup->ptr, lookup->name, lookup->mVLA); - } - } - if (names.size() != mBranchReaders.size()) { - LOGF(warn, "Not all requested columns were found in the tree"); - } - } - if (mBranchReaders.empty()) { - throw runtime_error("No columns will be read"); - } - // Was affected by https://github.com/root-project/root/issues/8962 - // Re-enabling this seems to cut the number of IOPS in half - tree->SetCacheSize(25000000); - // tree->SetClusterPrefetch(true); - for (auto& reader : mBranchReaders) { - tree->AddBranchToCache(reader->branch()); - if (strncmp(reader->branch()->GetName(), "fIndexArray", strlen("fIndexArray")) == 0) { - std::string sizeBranchName = reader->branch()->GetName(); - sizeBranchName += "_size"; - auto* sizeBranch = (TBranch*)tree->GetBranch(sizeBranchName.c_str()); - if (sizeBranch) { - tree->AddBranchToCache(sizeBranch); - } - } - } - tree->StopCacheLearningPhase(); -} - -void TreeToTable::setLabel(const char* label) -{ - mTableLabel = label; -} - -void TreeToTable::fill(TTree* tree) -{ - std::vector> columns; - std::vector> fields; - static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; - O2_SIGNPOST_ID_FROM_POINTER(sid, tabletree_helpers, &buffer); - O2_SIGNPOST_START(tabletree_helpers, sid, "TreeToTable", "Filling %{public}s", tree->GetName()); - for (auto& reader : mBranchReaders) { - buffer.Reset(); - auto arrayAndField = reader->read(&buffer); - columns.push_back(arrayAndField.first); - fields.push_back(arrayAndField.second); - } - O2_SIGNPOST_END(tabletree_helpers, sid, "TreeToTable", "Done filling."); - - auto schema = std::make_shared(fields, std::make_shared(std::vector{std::string{"label"}}, std::vector{mTableLabel})); - mTable = arrow::Table::Make(schema, columns); -} - -void TreeToTable::addReader(TBranch* branch, std::string const& name, bool VLA) -{ - static TClass* cls; - EDataType type; - branch->GetExpectedType(cls, type); - auto listSize = -1; - if (!VLA) { - listSize = static_cast(branch->GetListOfLeaves()->At(0))->GetLenStatic(); - } - mBranchReaders.emplace_back(std::make_unique(branch, VLA, name, type, listSize, mArrowMemoryPool)); -} - -std::shared_ptr TreeToTable::finalize() -{ - return mTable; -} - FragmentToBatch::FragmentToBatch(StreamerCreator creator, std::shared_ptr fragment, arrow::MemoryPool* pool) : mFragment{std::move(fragment)}, mArrowMemoryPool{pool}, diff --git a/Framework/Core/test/benchmark_TreeToTable.cxx b/Framework/Core/test/benchmark_TreeToTable.cxx deleted file mode 100644 index 6eca853a90ce4..0000000000000 --- a/Framework/Core/test/benchmark_TreeToTable.cxx +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "Framework/CommonDataProcessors.h" -#include "Framework/TableTreeHelpers.h" -#include "Framework/Logger.h" -#include -#include -#include - -#include - -using namespace o2::framework; -using namespace arrow; -using namespace o2::soa; - -namespace test -{ -DECLARE_SOA_COLUMN_FULL(X, x, float, "x"); -DECLARE_SOA_COLUMN_FULL(Y, y, float, "y"); -DECLARE_SOA_COLUMN_FULL(Z, z, float, "z"); -DECLARE_SOA_DYNAMIC_COLUMN(Sum, sum, [](float x, float y) { return x + y; }); -} // namespace test - -#ifdef __APPLE__ -constexpr unsigned int maxrange = 15; -#else -constexpr unsigned int maxrange = 16; -#endif - -static void BM_TreeToTable(benchmark::State& state) -{ - - // initialize a random generator - std::default_random_engine e1(1234567891); - std::uniform_real_distribution rd(0, 1); - std::normal_distribution rf(5., 2.); - std::discrete_distribution rl({10, 20, 30, 30, 5, 5}); - std::discrete_distribution ri({10, 20, 30, 30, 5, 5}); - - // create a table and fill the columns with random numbers - TableBuilder builder; - auto rowWriter = - builder.persist({"a", "b", "c", "d"}); - for (auto i = 0; i < state.range(0); ++i) { - rowWriter(0, rd(e1), rf(e1), rl(e1), ri(e1)); - } - auto table = builder.finalize(); - - // now convert the table to a tree - TFile fout("tree2table.root", "RECREATE"); - TableToTree ta2tr(table, &fout, "tree2table"); - ta2tr.addAllBranches(); - ta2tr.process(); - fout.Close(); - - // read tree and convert to table again - TFile* f = nullptr; - TreeToTable* tr2ta = nullptr; - for (auto _ : state) { - - // Open file and create tree - f = new TFile("tree2table.root", "READ"); - auto tr = (TTree*)f->Get("tree2table"); - - // benchmark TreeToTable - if (tr) { - tr2ta = new TreeToTable; - tr2ta->addAllColumns(tr); - tr2ta->fill(tr); - auto ta = tr2ta->finalize(); - } else { - LOG(info) << "tree is empty!"; - } - - // clean up - delete tr2ta; - - f->Close(); - delete f; - } - - state.SetBytesProcessed(state.iterations() * state.range(0) * 24); -} - -BENCHMARK(BM_TreeToTable)->Range(8, 8 << maxrange); - -BENCHMARK_MAIN(); diff --git a/Framework/Core/test/test_TreeToTable.cxx b/Framework/Core/test/test_TreeToTable.cxx deleted file mode 100644 index 4f3429a5bba62..0000000000000 --- a/Framework/Core/test/test_TreeToTable.cxx +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include - -#include "Framework/CommonDataProcessors.h" -#include "Framework/TableTreeHelpers.h" -#include "Framework/Logger.h" -#include "Framework/TableBuilder.h" - -#include -#include -#include -#include - -using namespace o2::framework; - -TEST_CASE("TreeToTableConversion") -{ - /// Create a simple TTree - Int_t ndp = 17; - - TFile f1("tree2table.root", "RECREATE"); - TTree t1("t1", "a simple Tree with simple variables"); - Bool_t ok, ts[5] = {false}; - Float_t px, py, pz; - Double_t random; - Int_t ev; - uint8_t b; - const Int_t nelem = 9; - Double_t ij[nelem] = {0}; - float xyzw[96]; - memset(xyzw, 1, 96 * 4); - TString leaflist = Form("ij[%i]/D", nelem); - - Int_t ncols = 10; - t1.Branch("ok", &ok, "ok/O"); - t1.Branch("px", &px, "px/F"); - t1.Branch("py", &py, "py/F"); - t1.Branch("pz", &pz, "pz/F"); - t1.Branch("random", &random, "random/D"); - t1.Branch("ev", &ev, "ev/I"); - t1.Branch("ij", ij, leaflist.Data()); - t1.Branch("tests", ts, "tests[5]/O"); - t1.Branch("xyzw", xyzw, "xyzw[96]/F"); - t1.Branch("small", &b, "small/b"); - - // fill the tree - int ntruein[2] = {0}; - for (int i = 0; i < ndp; i++) { - ok = (i % 2) == 0; - if (ok) { - ntruein[0]++; - } - gRandom->Rannor(px, py); - pz = px * px + py * py; - random = gRandom->Rndm(); - ev = i + 1; - b = i % 3; - for (Int_t jj = 0; jj < nelem; jj++) { - ij[jj] = i + 100 * jj; - } - for (Int_t jj = 0; jj < 5; jj++) { - ts[jj] = (((i + jj) % 2) == 0); - if (ts[jj]) { - ntruein[1]++; - } - } - - t1.Fill(); - } - t1.Write(); - - // Create an arrow table from this. - TreeToTable tr2ta; - tr2ta.addAllColumns(&t1); - tr2ta.fill(&t1); - auto table = tr2ta.finalize(); - f1.Close(); - - // test result - REQUIRE(table->Validate().ok() == true); - REQUIRE(table->num_rows() == ndp); - REQUIRE(table->num_columns() == ncols); - - REQUIRE(table->column(0)->type()->id() == arrow::Type::BOOL); - REQUIRE(table->column(1)->type()->id() == arrow::Type::FLOAT); - REQUIRE(table->column(2)->type()->id() == arrow::Type::FLOAT); - REQUIRE(table->column(3)->type()->id() == arrow::Type::FLOAT); - REQUIRE(table->column(4)->type()->id() == arrow::Type::DOUBLE); - REQUIRE(table->column(5)->type()->id() == arrow::Type::INT32); - REQUIRE(table->column(6)->type()->id() == arrow::Type::FIXED_SIZE_LIST); - REQUIRE(table->column(7)->type()->id() == arrow::Type::FIXED_SIZE_LIST); - REQUIRE(table->column(8)->type()->id() == arrow::Type::FIXED_SIZE_LIST); - REQUIRE(table->column(9)->type()->id() == arrow::Type::UINT8); - - REQUIRE(table->column(0)->type()->Equals(arrow::boolean())); - REQUIRE(table->column(1)->type()->Equals(arrow::float32())); - REQUIRE(table->column(2)->type()->Equals(arrow::float32())); - REQUIRE(table->column(3)->type()->Equals(arrow::float32())); - REQUIRE(table->column(4)->type()->Equals(arrow::float64())); - REQUIRE(table->column(5)->type()->Equals(arrow::int32())); - REQUIRE(table->column(6)->type()->Equals(arrow::fixed_size_list(arrow::float64(), nelem))); - REQUIRE(table->column(7)->type()->Equals(arrow::fixed_size_list(arrow::boolean(), 5))); - REQUIRE(table->column(8)->type()->Equals(arrow::fixed_size_list(arrow::float32(), 96))); - REQUIRE(table->column(9)->type()->Equals(arrow::uint8())); - - // count number of rows with ok==true - int ntrueout = 0; - auto chunks = table->column(0); - REQUIRE(!(chunks.get() == nullptr)); - - auto oks = std::dynamic_pointer_cast(chunks->chunk(0)); - REQUIRE(!(oks.get() == nullptr)); - - for (int ii = 0; ii < table->num_rows(); ii++) { - ntrueout += oks->Value(ii) ? 1 : 0; - } - REQUIRE(ntruein[0] == ntrueout); - - // count number of ts with ts==true - chunks = table->column(7); - REQUIRE(!(chunks.get() == nullptr)); - - auto chunkToUse = std::static_pointer_cast(chunks->chunk(0))->values(); - REQUIRE(!(chunkToUse.get() == nullptr)); - - auto tests = std::dynamic_pointer_cast(chunkToUse); - ntrueout = 0; - for (int ii = 0; ii < table->num_rows() * 5; ii++) { - ntrueout += tests->Value(ii) ? 1 : 0; - } - REQUIRE(ntruein[1] == ntrueout); - - // save table as tree - TFile* f2 = TFile::Open("table2tree.root", "RECREATE"); - TableToTree ta2tr(table, f2, "mytree"); - ta2tr.addAllBranches(); - - auto t2 = ta2tr.process(); - auto br = (TBranch*)t2->GetBranch("ok"); - REQUIRE(t2->GetEntries() == ndp); - REQUIRE(br->GetEntries() == ndp); - br = (TBranch*)t2->GetBranch("tests"); - REQUIRE(br->GetEntries() == ndp); - - f2->Close(); -} - -namespace o2::aod -{ -namespace cols -{ -DECLARE_SOA_COLUMN(Ivec, ivec, std::vector); -DECLARE_SOA_COLUMN(Fvec, fvec, std::vector); -DECLARE_SOA_COLUMN(Dvec, dvec, std::vector); -DECLARE_SOA_COLUMN(UIvec, uivec, std::vector); -} // namespace cols - -DECLARE_SOA_TABLE(Vectors, "AOD", "VECS", o2::soa::Index<>, cols::Ivec, cols::Fvec, cols::Dvec, cols::UIvec); -} // namespace o2::aod - -TEST_CASE("VariableLists") -{ - TableBuilder b; - auto writer = b.cursor(); - std::vector iv; - std::vector fv; - std::vector dv; - std::vector ui; - - std::array empty = {3, 7, 10}; - auto count = 0; - for (auto i = 1; i < 1000; ++i) { - iv.clear(); - fv.clear(); - dv.clear(); - ui.clear(); - if (count < empty.size() && i != empty[count]) { - for (auto j = 0; j < i % 10 + 1; ++j) { - iv.push_back(j + 2); - fv.push_back((j + 2) * 0.2134f); - dv.push_back((j + 4) * 0.192873819237); - ui.push_back(j); - } - } else { - count++; - } - writer(0, iv, fv, dv, ui); - } - auto table = b.finalize(); - - auto* f = TFile::Open("variable_lists.root", "RECREATE"); - TableToTree ta2tr(table, f, "lists"); - ta2tr.addAllBranches(); - auto tree = ta2tr.process(); - f->Close(); - - auto* f2 = TFile::Open("variable_lists.root", "READ"); - auto* treeptr = static_cast(f2->Get("lists;1")); - TreeToTable tr2ta; - tr2ta.addAllColumns(treeptr); - tr2ta.fill(treeptr); - auto ta = tr2ta.finalize(); - o2::aod::Vectors v{ta}; - int i = 1; - count = 0; - for (auto& row : v) { - auto ivr = row.ivec(); - auto fvr = row.fvec(); - auto dvr = row.dvec(); - auto uvr = row.uivec(); - if (count < empty.size() && i != empty[count]) { - for (auto j = 0; j < i % 10 + 1; ++j) { - REQUIRE(ivr[j] == j + 2); - REQUIRE(fvr[j] == (j + 2) * 0.2134f); - REQUIRE(dvr[j] == (j + 4) * 0.192873819237); - REQUIRE(uvr[j] == j); - } - } else { - REQUIRE(ivr.size() == 0); - REQUIRE(fvr.size() == 0); - REQUIRE(dvr.size() == 0); - REQUIRE(uvr.size() == 0); - count++; - } - ++i; - } -} From ccb26194cc88a5c1f9a352037caa8cd601a5fb0a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 10 Mar 2025 15:14:53 +0100 Subject: [PATCH 0022/1764] GPU TPC: Fix filtering check (#14032) --- .../Global/GPUChainTrackingCompression.cxx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 57a759a58924e..03d319f42fd6b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,12 +201,14 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { - const bool runFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); - if (runFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + const bool needFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0); + const bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0; + if (needFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); } if (GetProcessingSettings().tpcUseOldCPUDecoding) { + const bool runFiltering = needFullFiltering || runTimeBinCutFiltering; const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; auto allocatorFinal = [this](size_t size) { @@ -219,13 +221,13 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - auto allocatorUse = runFullFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; + auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { GPUError("Error decompressing clusters"); return 1; } - if (runFullFiltering) { + if (runFiltering) { RunTPCClusterFilter(mClusterNativeAccess.get(), allocatorFinal, GetProcessingSettings().tpcApplyCFCutsAtDecoding); } decompressTimer.Stop(); @@ -245,7 +247,6 @@ int32_t GPUChainTracking::RunTPCDecompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR")); RecoStep myStep = RecoStep::TPCDecompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression; - bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0; GPUTPCDecompression& Decompressor = processors()->tpcDecompressor; GPUTPCDecompression& DecompressorShadow = doGPU ? processorsShadow()->tpcDecompressor : Decompressor; const auto& threadContext = GetThreadContext(); From d2bd60f0d78e9f80807f03f740c7baa488068970 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 10 Mar 2025 11:20:34 +0100 Subject: [PATCH 0023/1764] GPU HIP Cmake: Get rid of hipcc, use Clang --cuda-device-only instead --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt | 6 ++---- GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index e85a3c3e9e1f3..7fb3744551953 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -28,7 +28,7 @@ #ifndef __HIPCC__ // CUDA #define PER_KERNEL_OBJECT_EXT _fatbin #else // HIP -#define PER_KERNEL_OBJECT_EXT _hip_cxx_o +#define PER_KERNEL_OBJECT_EXT _hip_o #endif #define GPUCA_KRNL(x_class, ...) QGET_LD_BINARY_SYMBOLS(GPUCA_M_CAT3(cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), PER_KERNEL_OBJECT_EXT)) #include "GPUReconstructionKernelList.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index f6e420d5b9656..30f6683ff93c5 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -122,7 +122,7 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ add_custom_command( OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${hip_HIPCC_EXECUTABLE} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} --genco" > ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" @@ -159,7 +159,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP -# SOURCES test/testGPUsortHIP.hip.cxx +# SOURCES test/testGPUsortHIP.hip # PUBLIC_LINK_LIBRARIES O2::GPUCommon hip::host hip::device hip::hipcub roc::rocthrust # COMPONENT_NAME GPU # LABELS gpu) diff --git a/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt b/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt index 15b72379fdfa7..789333eea9f04 100644 --- a/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt @@ -9,7 +9,5 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip.cxx>, >) -set(CMAKE_CXX_COMPILER ${hip_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_FLAGS "${GPU_RTC_FLAGS} ${GPU_RTC_FLAGS_ARCH} --genco") -unset(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}) +add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip>, >) +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --cuda-device-only") diff --git a/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx b/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx index 822af77bce48c..ed13124ef65df 100644 --- a/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx +++ b/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file testGPUsortHIP.hip.cxx +/// \file testGPUsortHIP.hip /// \author Michael Lettrich #define GPUCA_GPUTYPE_VEGA diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index ff003eca78948..b05fa19785dd8 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -96,7 +96,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) endif() if(HIP_ENABLED) - set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.hip.cxx") + set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.hip") set(O2_GPU_KERNEL_TEMPLATE_REPLACE "${TMP_KERNEL}") configure_file(${O2_GPU_BASE_DIR}/Base/hip/GPUReconstructionHIPkernel.template.hip ${TMP_FILENAMEA}) endif() From 9607305b9fed276904133fbd2487da73b3b3b41c Mon Sep 17 00:00:00 2001 From: Diego Stocco Date: Tue, 11 Mar 2025 09:07:48 +0100 Subject: [PATCH 0024/1764] Add MID local board hardware name in the mapping info (#14034) --- Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx | 2 ++ .../include/MIDGlobalMapping/ExtendedMappingInfo.h | 1 + Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx b/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx index 0086bf6b4caa5..f8efd6fddb79d 100644 --- a/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx +++ b/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx @@ -57,6 +57,8 @@ void stripsInfo2json(const std::vector& infos, con writer.Int(infos[idx].locId); writer.Key("locIdDcs"); writer.String(infos[idx].locIdDcs.c_str()); + writer.Key("locIdHw"); + writer.String(infos[idx].locIdHw.c_str()); writer.EndObject(); } writer.EndArray(); diff --git a/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h b/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h index f05b2d6acba1f..1153f75c774ba 100644 --- a/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h +++ b/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h @@ -34,6 +34,7 @@ struct ExtendedMappingInfo { int cathode; ///< Bending (0) or Non-bending (1) planes int locId; ///< Local board ID std::string locIdDcs; ///< Local board ID for DCS + std::string locIdHw; /// Local board ID in the hardware int xpos; ///< Position X int ypos; ///< Position Y int xwidth; ///< Width X (signed) diff --git a/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx b/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx index aebaade01f963..eeb17759197c9 100644 --- a/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx +++ b/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx @@ -79,7 +79,10 @@ std::array GlobalMapper::getStripGeom(int deId, int columnId, int lineId ExtendedMappingInfo GlobalMapper::buildExtendedInfo(int deId, int columnId, int lineId, int stripId, int cathode) const { ExtendedMappingInfo info; + std::array boards{"12", "34", "56", "78"}; info.id = getStripId(deId, columnId, lineId, stripId, cathode); + int irpc = detparams::getRPCLine(deId); + int iline = (irpc == 5 && columnId == 0) ? lineId - 1 : lineId; auto locId = static_cast(mCrateMapper.deLocalBoardToRO(deId, columnId, lineId)); info.locId = locId; std::string side = detparams::isRightSide(deId) ? "R" : "L"; @@ -92,6 +95,7 @@ ExtendedMappingInfo GlobalMapper::buildExtendedInfo(int deId, int columnId, int info.stripId = stripId; info.cathode = cathode; info.locIdDcs = fmt::format("{}{}{}{}", crateId, side, (locInCrate >= 8 ? "1" : "0"), locInCrate); + info.locIdHw = fmt::format("{}{}C{}L{}B{}", detparams::getChamber(deId) + 1, side, columnId + 1, irpc + 1, boards[iline]); auto geom = getStripGeom(deId, columnId, lineId, stripId, cathode); info.xpos = geom[0]; info.ypos = geom[1]; From 41c8f04218623fc065fe6f85eb7d8f96f215cf33 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 11 Mar 2025 10:15:27 +0100 Subject: [PATCH 0025/1764] Drop need for Framework/RootTableBuilderHelpers.h (#14036) --- .../src/AODJAlienReaderHelpers.cxx | 1 - Framework/Core/CMakeLists.txt | 5 - .../Framework/RootTableBuilderHelpers.h | 233 ------------------ Framework/Core/src/AODReaderHelpers.cxx | 1 - Framework/Core/src/verifyAODFile.cxx | 54 ---- Framework/Core/test/test_Root2ArrowTable.cxx | 136 ---------- 6 files changed, 430 deletions(-) delete mode 100644 Framework/Core/include/Framework/RootTableBuilderHelpers.h delete mode 100644 Framework/Core/src/verifyAODFile.cxx diff --git a/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx index f8a9705e4eb62..85ed9cd573d8a 100644 --- a/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx @@ -14,7 +14,6 @@ #include "Framework/TableTreeHelpers.h" #include "Framework/AnalysisHelpers.h" #include "Framework/DataProcessingStats.h" -#include "Framework/RootTableBuilderHelpers.h" #include "Framework/RootArrowFilesystem.h" #include "Framework/AlgorithmSpec.h" #include "Framework/ConfigParamRegistry.h" diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index f059984b5d85d..2691d9d33a0c6 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -328,11 +328,6 @@ o2_add_executable(dpl-run PUBLIC_LINK_LIBRARIES O2::Framework ) -o2_add_executable(verify-aod-file - SOURCES src/verifyAODFile.cxx - PUBLIC_LINK_LIBRARIES O2::Framework ROOT::TreePlayer - COMPONENT_NAME Framework) - # benchmarks foreach(b diff --git a/Framework/Core/include/Framework/RootTableBuilderHelpers.h b/Framework/Core/include/Framework/RootTableBuilderHelpers.h deleted file mode 100644 index 0fa818084a5a4..0000000000000 --- a/Framework/Core/include/Framework/RootTableBuilderHelpers.h +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifndef o2_framework_RootTableBuilderHelpers_H_INCLUDED -#define o2_framework_RootTableBuilderHelpers_H_INCLUDED - -#include "Framework/TableBuilder.h" -#include "Framework/Logger.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace o2::framework -{ - -template -struct TreeReaderValueTraits { -}; - -/// Trait class to go from a set of TTreeReaderValues to -/// arrow types. -template -struct TreeReaderValueTraits> { - using Type = typename TTreeReaderValue::NonConstT_t; - using ArrowType = typename o2::framework::detail::ConversionTraits::ArrowType; - using BuilderType = typename arrow::TypeTraits::BuilderType; -}; - -template -struct TreeReaderValueTraits> { - using Iterator = typename TTreeReaderArray::iterator; - using Type = std::pair; - using ArrowType = arrow::ListType; -}; - -static constexpr int PREBUFFER_SIZE = 32 * 1024; - -// When reading from a ROOT file special care must happen -// because uint64_t is platform specific while ULong64_t is -// always long long unsigned int (same for the signed version). -// By using this traits we make sure that any 64 bit quantity -// read from a root file uses the ROOT datatype, not the platform one. -template -struct Remap64Bit { - using type = T; -}; - -template <> -struct Remap64Bit { - using type = Long64_t; -}; - -template <> -struct Remap64Bit { - using type = ULong64_t; -}; - -template -struct Remap64Bit { - using type = Long64_t[N]; -}; - -template -struct Remap64Bit { - using type = ULong64_t[N]; -}; - -template -using Remap64Bit_t = typename Remap64Bit::type; - -template -struct ReaderHolder { - using Reader = TTreeReaderValue; - using Type = T; - - ReaderHolder(TBranch* branch, std::unique_ptr reader_) - : reader{std::move(reader_)} - { - } - - ReaderHolder(ReaderHolder&& other) - : reader{std::move(other.reader)}, - pos{other.pos} - { - } - - ReaderHolder& operator=(ReaderHolder&& other) = delete; - - std::unique_ptr reader; - int pos = 0; - Remap64Bit_t buffer[PREBUFFER_SIZE]; - int itemSize = sizeof(T); -}; - -template -struct ReaderHolder { - using Reader = TTreeReaderArray; - using Type = T (&)[N]; - - ReaderHolder(TBranch* branch, std::unique_ptr reader_) - : reader{std::move(reader_)} - { - } - - ReaderHolder(ReaderHolder&& other) - : reader{std::move(other.reader)}, - pos{other.pos} - { - } - - ReaderHolder& operator=(ReaderHolder&& other) = delete; - - std::unique_ptr reader; - int pos = 0; - Remap64Bit_t buffer[PREBUFFER_SIZE * N]; - int itemSize = sizeof(T) * N; -}; - -struct BulkExtractor { - template - static auto deref(ReaderHolder& holder, size_t maxSize) - { - holder.buffer[holder.pos % PREBUFFER_SIZE] = **holder.reader; - holder.pos++; - if (holder.pos == maxSize) { - return BulkInfo const*>{holder.buffer, maxSize % PREBUFFER_SIZE}; - } - // We flush only after PREBUFFER_SIZE items have been inserted - if ((holder.pos % PREBUFFER_SIZE) != 0) { - return BulkInfo const*>{nullptr, 0}; - } - return BulkInfo const*>{holder.buffer, PREBUFFER_SIZE}; - } - - template - static auto deref(ReaderHolder& holder, size_t maxSize) - { - memcpy(&holder.buffer[(holder.pos % PREBUFFER_SIZE) * N], &((*holder.reader)[0]), N * sizeof(T)); - holder.pos++; - if (holder.pos == maxSize) { - return BulkInfo const*>{holder.buffer, maxSize % PREBUFFER_SIZE}; - } - // We flush only after PREBUFFER_SIZE items have been inserted - if ((holder.pos % PREBUFFER_SIZE) != 0) { - return BulkInfo const*>{nullptr, 0}; - } - return BulkInfo const*>{reinterpret_cast(holder.buffer), PREBUFFER_SIZE}; - } -}; - -template -struct HolderMaker { - static auto make(TTreeReader& reader, char const* branchName) - { - using Reader = TTreeReaderValue; - return ReaderHolder{reader.GetTree()->GetBranch(branchName), std::move(std::make_unique(reader, branchName))}; - } -}; - -template -struct HolderMaker { - static auto make(TTreeReader& reader, char const* branchName) - { - using Reader = TTreeReaderArray; - return ReaderHolder{reader.GetTree()->GetBranch(branchName), std::move(std::make_unique(reader, branchName))}; - } -}; - -template -struct ColumnReaderTrait { - static auto createReader(TTreeReader& reader) - { - return HolderMaker>::make(reader, C::base::columnLabel()); - } -}; - -struct RootTableBuilderHelpers { - /// Use bulk insertion when TTreeReaderValue everywhere - template - static void convertTTree(TableBuilder& builder, - TTreeReader& reader, - ReaderHolder... holders) - { - std::array branchNames = {holders.reader->GetBranchName()...}; - TTree* tree = reader.GetTree(); - size_t maxExtries = reader.GetEntries(true); - tree->SetCacheSize(maxExtries * (holders.itemSize + ...)); - (tree->AddBranchToCache(tree->GetBranch(holders.reader->GetBranchName()), true), ...); - tree->StopCacheLearningPhase(); - - auto filler = builder.bulkPersistChunked::Type>...>(branchNames, maxExtries); - while (reader.Next()) { - filler(0, BulkExtractor::deref(holders, maxExtries)...); - } - } - - template - static void convertASoAColumns(TableBuilder& builder, TTreeReader& reader, pack) - { - return convertTTree(builder, reader, ColumnReaderTrait::createReader(reader)...); - } - - template - static void convertASoA(TableBuilder& builder, TTreeReader& reader) - { - return convertASoAColumns(builder, reader, typename T::persistent_columns_t{}); - } -}; - -} // namespace o2 -#endif // FRAMEWORK_ROOTTABLEBUILDERHELPERS_H diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index f6513b5facea3..6270d07a022e8 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -15,7 +15,6 @@ #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/DataProcessingHelpers.h" #include "Framework/ExpressionHelpers.h" -#include "Framework/RootTableBuilderHelpers.h" #include "Framework/AlgorithmSpec.h" #include "Framework/ConfigParamRegistry.h" #include "Framework/ControlService.h" diff --git a/Framework/Core/src/verifyAODFile.cxx b/Framework/Core/src/verifyAODFile.cxx deleted file mode 100644 index 2660019031946..0000000000000 --- a/Framework/Core/src/verifyAODFile.cxx +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "Framework/AnalysisDataModel.h" -#include "Framework/RootTableBuilderHelpers.h" -#include "Framework/Logger.h" -#include "Framework/ASoA.h" -#include -#include -#include - -using namespace o2::framework; -using namespace o2::soa; - -template -void verifyTable(TFile* infile, const char* branchName) -{ - std::cout << "Table: " << o2::aod::label() << std::endl; - std::unique_ptr reader = std::make_unique(branchName, infile); - TableBuilder builder; - RootTableBuilderHelpers::convertASoA(builder, *reader); - auto table = builder.finalize(); - std::cout << table->schema()->ToString() << std::endl; - std::cout << "---" << std::endl; -} - -int main(int argc, char** argv) -{ - if (argc != 2) { - LOG(error) << "Bad number of arguments"; - return 1; - } - auto infile = std::make_unique(argv[1]); - if (infile.get() == nullptr || infile->IsOpen() == false) { - LOG(error) << "File not found: " << argv[1]; - return 1; - } - - verifyTable(infile.get(), "O2collision"); - verifyTable(infile.get(), "O2track"); - verifyTable(infile.get(), "O2track"); - verifyTable(infile.get(), "O2track"); - verifyTable(infile.get(), "O2calo"); - verifyTable(infile.get(), "O2fwdtrack"); - return 0; -} diff --git a/Framework/Core/test/test_Root2ArrowTable.cxx b/Framework/Core/test/test_Root2ArrowTable.cxx index 663be91a1e6f3..395048ae916d6 100644 --- a/Framework/Core/test/test_Root2ArrowTable.cxx +++ b/Framework/Core/test/test_Root2ArrowTable.cxx @@ -12,7 +12,6 @@ #include #include "Framework/TableBuilder.h" -#include "Framework/RootTableBuilderHelpers.h" #include "Framework/ASoA.h" #include "Framework/PluginManager.h" #include "../src/ArrowDebugHelpers.h" @@ -50,87 +49,6 @@ using namespace o2::framework; -TEST_CASE("RootTree2Table") -{ - using namespace o2::framework; - /// Create a simple TTree - TTree t1("t1", "a simple Tree with simple variables"); - Float_t xyz[3]; - Int_t ij[2]; - Float_t px, py, pz; - Double_t random; - Int_t ev; - t1.Branch("px", &px, "px/F"); - t1.Branch("py", &py, "py/F"); - t1.Branch("pz", &pz, "pz/F"); - t1.Branch("random", &random, "random/D"); - t1.Branch("ev", &ev, "ev/I"); - t1.Branch("xyz", xyz, "xyz[3]/F"); - t1.Branch("ij", ij, "ij[2]/I"); - // fill the tree - for (Int_t i = 0; i < 1000; i++) { - xyz[0] = 1; - xyz[1] = 2; - xyz[2] = 3; - gRandom->Rannor(px, py); - pz = px * px + py * py; - xyz[2] = i + 1; - ij[0] = i; - ij[1] = i + 1; - random = gRandom->Rndm(); - ev = i + 1; - t1.Fill(); - } - - // Create an arrow table from this. - TableBuilder builder; - TTreeReader reader(&t1); - auto&& xyzReader = HolderMaker::make(reader, "xyz"); - auto&& ijkReader = HolderMaker::make(reader, "ij"); - auto&& pxReader = HolderMaker::make(reader, "px"); - auto&& pyReader = HolderMaker::make(reader, "py"); - auto&& pzReader = HolderMaker::make(reader, "pz"); - auto&& randomReader = HolderMaker::make(reader, "random"); - auto&& evReader = HolderMaker::make(reader, "ev"); - - RootTableBuilderHelpers::convertTTree(builder, reader, std::move(xyzReader), std::move(ijkReader), std::move(pxReader), std::move(pyReader), std::move(pzReader), std::move(randomReader), std::move(evReader)); - auto table = builder.finalize(); - REQUIRE(table->num_rows() == 1000); - REQUIRE(table->num_columns() == 7); - REQUIRE(table->schema()->field(0)->type()->id() == arrow::fixed_size_list(arrow::float32(), 3)->id()); - REQUIRE(table->schema()->field(1)->type()->id() == arrow::fixed_size_list(arrow::int32(), 2)->id()); - REQUIRE(table->schema()->field(2)->type()->id() == arrow::float32()->id()); - REQUIRE(table->schema()->field(3)->type()->id() == arrow::float32()->id()); - REQUIRE(table->schema()->field(4)->type()->id() == arrow::float32()->id()); - REQUIRE(table->schema()->field(5)->type()->id() == arrow::float64()->id()); - REQUIRE(table->schema()->field(6)->type()->id() == arrow::int32()->id()); - - { - auto chunkToUse = table->column(0)->chunk(0); - chunkToUse = std::dynamic_pointer_cast(chunkToUse)->values(); - auto array = std::static_pointer_cast(chunkToUse); - // array of 3 floats, time 1000. - REQUIRE(array->length() == 3000); - const float* c = reinterpret_cast(array->values()->data()); - - CHECK(c[0] == 1); - CHECK(c[1] == 2); - CHECK(c[2] == 1); - } - { - auto chunkToUse = table->column(1)->chunk(0); - chunkToUse = std::dynamic_pointer_cast(chunkToUse)->values(); - auto array = std::static_pointer_cast(chunkToUse); - REQUIRE(array->length() == 2000); - - const int* ptr = reinterpret_cast(array->values()->data()); - for (size_t i = 0; i < 1000; i++) { - CHECK(ptr[2 * i + 0] == i); - CHECK(ptr[2 * i + 1] == i + 1); - } - } -} - namespace o2::aod { namespace test @@ -149,60 +67,6 @@ DECLARE_SOA_TABLE(Test, "AOD", "ETAPHI", test::Random, test::Ev); } // namespace o2::aod -TEST_CASE("RootTree2TableViaASoA") -{ - using namespace o2::framework; - /// Create a simple TTree - TTree t2("t2", "a simple Tree with simple variables"); - Float_t xyz[3]; - Int_t ij[2]; - Float_t px, py, pz; - Double_t random; - Int_t ev; - t2.Branch("px", &px, "px/F"); - t2.Branch("py", &py, "py/F"); - t2.Branch("pz", &pz, "pz/F"); - t2.Branch("random", &random, "random/D"); - t2.Branch("ev", &ev, "ev/I"); - t2.Branch("xyz", xyz, "xyz[3]/F"); - t2.Branch("ij", ij, "ij[2]/I"); - // fill the tree - for (Int_t i = 0; i < 1000; i++) { - gRandom->Rannor(xyz[0], xyz[1]); - gRandom->Rannor(px, py); - pz = px * px + py * py; - xyz[2] = i + 1; - ij[0] = i; - ij[1] = i + 1; - random = gRandom->Rndm(); - ev = i + 1; - t2.Fill(); - } - - // Create an arrow table from this. - TableBuilder builder; - TTreeReader reader(&t2); - REQUIRE(t2.GetEntries() == 1000); - - RootTableBuilderHelpers::convertASoA(builder, reader); - auto table = builder.finalize(); - REQUIRE(table->num_rows() == 1000); - REQUIRE(table->num_columns() == 7); - REQUIRE(table->column(0)->type()->id() == arrow::float32()->id()); - REQUIRE(table->column(1)->type()->id() == arrow::float32()->id()); - REQUIRE(table->column(2)->type()->id() == arrow::float32()->id()); - REQUIRE(table->column(3)->type()->id() == arrow::fixed_size_list(arrow::float32(), 3)->id()); - REQUIRE(table->column(4)->type()->id() == arrow::fixed_size_list(arrow::int32(), 2)->id()); - REQUIRE(table->column(5)->type()->id() == arrow::float64()->id()); - REQUIRE(table->column(6)->type()->id() == arrow::int32()->id()); - - o2::aod::Test testTable{table}; - for (auto& row : testTable) { - REQUIRE(row.ij()[0] == row.ij()[1] - 1); - REQUIRE(row.ij()[1] == row.ev()); - } -} - TEST_CASE("RootTree2Fragment") { using namespace o2::framework; From 83bcaaadc6bd8378afb18db7f56d11812b60060c Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:55:37 +0100 Subject: [PATCH 0026/1764] DPL: provide defaults for inputs and outputs (#14038) Silence a bunch of warnings when using aggregate initialization. --- Framework/Core/include/Framework/DataProcessorSpec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Framework/Core/include/Framework/DataProcessorSpec.h b/Framework/Core/include/Framework/DataProcessorSpec.h index fafb7fda43ce3..9821a2561d08b 100644 --- a/Framework/Core/include/Framework/DataProcessorSpec.h +++ b/Framework/Core/include/Framework/DataProcessorSpec.h @@ -40,8 +40,8 @@ struct DataProcessorMetadata { struct DataProcessorSpec { std::string name; - Inputs inputs; - Outputs outputs; + Inputs inputs = {}; + Outputs outputs = {}; AlgorithmSpec algorithm; Options options = {}; From a8f75744fd7d4078a834ff71891cf306937c8c86 Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:43:03 +0100 Subject: [PATCH 0027/1764] Bump actions version (#14017) > Error: This request has been automatically failed because it uses a deprecated version of `actions/cache: v2`. Please update your workflow to use v3/v4 of actions/cache to avoid interruptions --- .github/workflows/reports.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reports.yml b/.github/workflows/reports.yml index 0762debd04d54..cadb920fa022f 100644 --- a/.github/workflows/reports.yml +++ b/.github/workflows/reports.yml @@ -17,12 +17,12 @@ jobs: if: github.repository == 'AliceO2Group/AliceO2' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 uses: actions/setup-python@v5 with: python-version: '3.10' - - uses: actions/cache@v2 + - uses: actions/cache@v4 name: Configure pip caching with: path: ~/.cache/pip From 48c7605979462cfeed6a3944133a755cd0f5ca44 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 11 Mar 2025 17:14:30 +0100 Subject: [PATCH 0028/1764] DPL Analysis: improve error message when messages are malformed (#14040) --- Framework/AnalysisSupport/src/AODWriterHelpers.cxx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx index fa10d4661f537..2b1b4f880d1ee 100644 --- a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx @@ -274,19 +274,20 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) LOG(error) << "Header not found"; return; } - if (!ref.payload) { - LOG(error) << "Payload not found"; - return; - } auto datah = o2::header::get(ref.header); if (!datah) { LOG(error) << "No data header in stack"; return; } + if (!ref.payload) { + LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } + auto objh = o2::header::get(ref.header); if (!objh) { - LOG(error) << "No output object header in stack"; + LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); return; } @@ -297,7 +298,7 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) tm.SetBufferOffset(0); tm.ResetMap(); if (obj.kind == nullptr) { - LOG(error) << "Cannot read class info from buffer."; + LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); return; } From cfa7b71004811813df8cf27450047d7e427bc1a9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 15:00:18 +0100 Subject: [PATCH 0029/1764] dpl-workflow: Automatically apply MI100 workaround in sync --- prodtests/full-system-test/dpl-workflow.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 0f5083dbcdebb..f9b0c7accbff9 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -259,7 +259,8 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ "${EPN_NODE_MI100:-}" != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 + [[ ${EPN_NODE_MI100:-} != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 + [[ $EPNSYNCMODE == 1 && ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From fc1fd7b0174cb3821c8c9616474e696f0cd30dde Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 11 Mar 2025 13:51:52 +0100 Subject: [PATCH 0030/1764] Fix scaling ITS CA tracker params for low Bfield --- Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 4 +--- GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index f625b77a013b0..cbb31ff8bceb7 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -120,17 +120,15 @@ void ITSTrackingInterface::initialise() for (auto& params : trackParams) { params.CorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrLUT; } - // adjust pT settings to actual mag. field for (size_t ip = 0; ip < trackParams.size(); ip++) { auto& param = trackParams[ip]; + param.TrackletMinPt *= bFactor; for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { int lslot = trackConf.MaxTrackLenght - ilg; param.MinPt[lslot] *= bFactor; - param.TrackletMinPt *= bFactor; } } - mTracker->setParameters(trackParams); mVertexer->setParameters(vertParams); } diff --git a/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx b/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx index b81e816d6fc1d..1601e11f2c6fa 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx +++ b/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx @@ -22,7 +22,7 @@ #include "ITStracking/Cell.h" #include "CommonConstants/MathConstants.h" -#ifdef CA_DEBUG +#if defined(CA_DEBUG) && !defined(GPUCA_GPUCODE_DEVICE) #include #endif From c5209b138c1f44fc5cb374d3538eb732506bbdc7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 11 Mar 2025 16:12:57 +0100 Subject: [PATCH 0031/1764] Do not fetch meta-data object unless reading explicit ccdb snapshot file --- CCDB/include/CCDB/CcdbApi.h | 2 +- CCDB/src/CcdbApi.cxx | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CCDB/include/CCDB/CcdbApi.h b/CCDB/include/CCDB/CcdbApi.h index 5ad56fbd50557..1308742b57fd0 100644 --- a/CCDB/include/CCDB/CcdbApi.h +++ b/CCDB/include/CCDB/CcdbApi.h @@ -388,7 +388,7 @@ class CcdbApi //: public DatabaseInterface static bool removeSemaphore(std::string const& name, bool remove = false); static void removeLeakingSemaphores(std::string const& basedir, bool remove = false); - void loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders = nullptr) const; + void loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders = nullptr, bool fetchLocalMetaData = true) const; void loadFileToMemory(o2::pmr::vector& dest, std::string const& path, std::map const& metadata, long timestamp, std::map* headers, std::string const& etag, diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index 2906438211c65..c9d2fad882aa1 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -1989,7 +1989,7 @@ bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& { if (url.find("alien:/", 0) != std::string::npos) { std::map localHeaders; - loadFileToMemory(dest, url, &localHeaders); + loadFileToMemory(dest, url, &localHeaders, false); auto it = localHeaders.find("Error"); if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { return false; @@ -2001,7 +2001,7 @@ bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& std::string path = url.substr(7); if (std::filesystem::exists(path)) { std::map localHeaders; - loadFileToMemory(dest, url, &localHeaders); + loadFileToMemory(dest, url, &localHeaders, o2::utils::Str::endsWith(path, ".root")); auto it = localHeaders.find("Error"); if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { return false; @@ -2013,7 +2013,7 @@ bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& return false; } -void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders) const +void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders, bool fetchLocalMetaData) const { // Read file to memory as vector. For special case of the locally cached file retriev metadata stored directly in the file constexpr size_t MaxCopySize = 0x1L << 25; @@ -2061,7 +2061,7 @@ void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, const std::string& p totalread += nread; } while (nread == (long)MaxCopySize); - if (localHeaders) { + if (localHeaders && fetchLocalMetaData) { TMemFile memFile("name", const_cast(dest.data()), dest.size(), "READ"); auto storedmeta = (std::map*)extractFromTFile(memFile, TClass::GetClass("std::map"), CCDBMETA_ENTRY); if (storedmeta) { From ce065f9788b822d83f3154613b5bd9aa41ada987 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 08:58:07 +0100 Subject: [PATCH 0032/1764] GPU: Add IsNaN to CAMath --- GPU/Common/GPUCommonMath.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 9aa260f59842f..ef837658f74d1 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -82,6 +82,7 @@ class GPUCommonMath GPUd() static int32_t Float2IntRn(float x); GPUd() static float Modf(float x, float y); GPUd() static bool Finite(float x); + GPUd() static bool IsNaN(float x); GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); @@ -224,7 +225,8 @@ GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x) #ifdef GPUCA_NO_FAST_MATH GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } +GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } // Fixme: fix these 2 for OpenCL +GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), false); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } From 8b6b16c6e9b861066c7607dde33663ff881187c7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 11:36:06 +0100 Subject: [PATCH 0033/1764] GPU Math: Add memcpy and QuietNaN, fix Finite and IsNaN --- GPU/Common/GPUCommonMath.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index ef837658f74d1..5a813b74ed7b6 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -25,6 +25,8 @@ #include #include #include +#include +#include #endif #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) && (!defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__)) @@ -83,9 +85,14 @@ class GPUCommonMath GPUd() static float Modf(float x, float y); GPUd() static bool Finite(float x); GPUd() static bool IsNaN(float x); + GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() static float QuietNaN(); GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); + GPUd() static void memcpy(void* dst, const void* src, size_t size); + GPUhdni() static float Hypot(float x, float y); GPUhdni() static float Hypot(float x, float y, float z); GPUhdni() static float Hypot(float x, float y, float z, float w); @@ -181,6 +188,23 @@ typedef GPUCommonMath CAMath; #define CHOICE(c1, c2, c3) (c1) // Select first option for Host #endif // clang-format on +GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size) +{ +#ifndef GPUCA_GPUCODE_DEVICE + std::memcpy(dst, src, size); +#elif defined(__CUDACC__) || defined(__HIPCC__) + ::memcpy(dst, src, size); +#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + __builtin_memcpy(dst, src, size); +#else + char* d = (char*)dst; + const char* s = (const char*)src; + for (size_t i = 0; i < size; i++) { + d[i] = s[i]; + } +#endif +} + template GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val) { @@ -222,11 +246,12 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); } +GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } +GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), isnan(x)); } +GPUdi() float GPUCommonMath::QuietNaN() { return CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } #ifdef GPUCA_NO_FAST_MATH GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } // Fixme: fix these 2 for OpenCL -GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), false); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } @@ -238,10 +263,11 @@ GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE((float)asin((double)x GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); } GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } +GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } @@ -253,6 +279,8 @@ GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE(asinf(x), asinf(x), a GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE(acosf(x), acosf(x), acos(x)); } GPUdi() float GPUCommonMath::Log(float x) { return CHOICE(logf(x), logf(x), log(x)); } GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } +GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) From 2dbd7f888467d52b54ebc8d24c4826f495d85440 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Wed, 12 Mar 2025 13:31:58 +0100 Subject: [PATCH 0034/1764] Ctpdev (#14037) * dev: ctp config to BK and first orbit and orbit reset via zmq * clang --- .../include/CTPWorkflowScalers/RunManager.h | 2 +- .../CTPWorkflowScalers/ctpCCDBManager.h | 13 ++-- .../CTP/workflowScalers/src/RunManager.cxx | 37 ++++++++---- .../workflowScalers/src/ctpCCDBManager.cxx | 59 +++++++++++++++++++ 4 files changed, 94 insertions(+), 17 deletions(-) diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h index 0d624ecd8b892..e0b204e6c4ce5 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h @@ -47,7 +47,7 @@ class CTPRunManager : public ctpCCDBManager CTPRunManager() = default; void init(); int loadRun(const std::string& cfg); - int startRun(const std::string& cfg); + int setRunConfigBK(uint32_t runNumber, const std::string& cfg); int stopRun(uint32_t irun, long timeStamp); int addScalers(uint32_t irun, std::time_t time, bool start = 0); int processMessage(std::string& topic, const std::string& message); diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h index 5fb6d3678f0ba..c968a83183624 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h @@ -27,12 +27,11 @@ class ctpCCDBManager int saveRunScalersToCCDB(CTPRunScalers& scalers, long timeStart, long timeStop); int saveRunScalersToQCDB(CTPRunScalers& scalers, long timeStart, long timeStop); int saveRunConfigToCCDB(CTPConfiguration* cfg, long timeStart); + int saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long timeStart); + int saveOrbitReset(long timeStamp); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run, bool& ok); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run); CTPRunScalers getScalersFromCCDB(long timestamp, std::string, bool& ok); - void setCCDBPathConfig(std::string path) { mCCDBPathCTPConfig = path; }; - void setCCDBPathScalers(std::string path) { mCCDBPathCTPScalers = path; }; - void setQCDBPathScalers(std::string path) { mQCDBPathCTPScalers = path; }; static void setCCDBHost(std::string host) { mCCDBHost = host; }; static void setQCDBHost(std::string host) { mQCDBHost = host; }; @@ -42,9 +41,11 @@ class ctpCCDBManager // std::string mQCDBHost = "http://ali-qcdb.cern.ch:8083"; static std::string mCCDBHost; static std::string mQCDBHost; - std::string mCCDBPathCTPScalers = "CTP/Calib/Scalers"; - std::string mCCDBPathCTPConfig = "CTP/Config/Config"; - std::string mQCDBPathCTPScalers = "qc/CTP/Scalers"; + const std::string mCCDBPathCTPScalers = "CTP/Calib/Scalers"; + // std::string mCCDBPathCTPConfig = "CTP/Config/Config"; - in Configuration.h + const std::string mQCDBPathCTPScalers = "qc/CTP/Scalers"; + const std::string mCCDBPathSoxOrbit = "CTP/Calib/FirstRunOrbit"; + const std::string mCCDBPathOrbitReset = "CTP/Calib/OrbitReset"; ClassDefNV(ctpCCDBManager, 1); }; } // namespace ctp diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index e6861e6cb4b38..9af5b5e104120 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -57,7 +57,7 @@ int CTPActiveRun::send2BK(std::unique_ptr& BKClient, size_t ts, bool std::string clsname = cfg.getClassNameFromHWIndex(cls.first); // clsname = std::to_string(runOri) + "_" + clsname; try { - BKClient->triggerCounters()->createOrUpdateForRun(runNumber, clsname, ts, cntsbk[0], cntsbk[1], cntsbk[2], cntsbk[3], cntsbk[4], cntsbk[5]); + BKClient->ctpTriggerCounters()->createOrUpdateForRun(runNumber, clsname, ts, cntsbk[0], cntsbk[1], cntsbk[2], cntsbk[3], cntsbk[4], cntsbk[5]); } catch (std::runtime_error& error) { std::cerr << "An error occurred: " << error.what() << std::endl; return 1; @@ -124,8 +124,19 @@ int CTPRunManager::loadRun(const std::string& cfg) return 0; } -int CTPRunManager::startRun(const std::string& cfg) +int CTPRunManager::setRunConfigBK(uint32_t runNumber, const std::string& cfg) { + std::cout << "Printing cfg:" << cfg << std::endl; + if (mBKClient) { + try { + uint32_t runNumber = 1; + mBKClient->run()->setRawCtpTriggerConfiguration(runNumber, cfg); + } catch (std::runtime_error& error) { + std::cerr << "An error occurred: " << error.what() << std::endl; + return 1; + } + LOG(info) << "Run BK:" << runNumber << " CFG:" << cfg; + } return 0; } int CTPRunManager::stopRun(uint32_t irun, long timeStamp) @@ -221,6 +232,13 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message loadRun(message); return 0; } + if (topic.find("soxorbit") != std::string::npos) { + return 0; + } + if (topic.find("orbitreset") != std::string::npos) { + return 0; + } + static int nerror = 0; if (topic.find("sox") != std::string::npos) { // get config size_t irun = message.find("run"); @@ -230,17 +248,15 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message } LOG(info) << "SOX received, Run keyword position:" << irun; std::string cfg = message.substr(irun, message.size() - irun); - startRun(cfg); firstcounters = message.substr(0, irun); - } - if (topic.find("eox") != std::string::npos) { + } else if (topic.find("eox") != std::string::npos) { LOG(info) << "EOX received"; mEOX = 1; - } - static int nerror = 0; - if (topic == "rocnts") { - if (nerror < 1) { - LOG(warning) << "Skipping topic rocnts"; + } else if (topic.find("cnts") != std::string::npos) { + // just continue + } else { + if (nerror < 10) { + LOG(warning) << "Skipping topic:" << topic; nerror++; } return 0; @@ -293,6 +309,7 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message mActiveRunNumbers[i] = mCounters[i]; mActiveRuns[i] = run->second; mRunsLoaded.erase(run); + setRunConfigBK(mActiveRuns[i]->cfg.getRunNumber(), mActiveRuns[i]->cfg.getConfigString()); addScalers(i, tt, 1); saveRunScalersToQCDB(mActiveRuns[i]->scalers, tt * 1000, tt * 1000); } else { diff --git a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx index 3484cb97279b5..0d81b896b3e91 100644 --- a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx +++ b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx @@ -107,6 +107,65 @@ int ctpCCDBManager::saveRunConfigToCCDB(CTPConfiguration* cfg, long timeStart) } return ret; } +int ctpCCDBManager::saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long timestamp) +{ + // data base + if (mCCDBHost == "none") { + LOG(info) << "SOX Orbit not written to CCDB none"; + return 0; + } + std::vector vect; + if (timestamp == 0) { + auto now = std::chrono::system_clock::now(); + timestamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + } + vect.push_back(timestamp); + vect.push_back((uint64_t)runNumber); + vect.push_back((uint64_t)soxOrbit); + long tmin = timestamp; + long tmax = tmin + 381928219; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + metadata["runNumber"] = std::to_string(runNumber); + api.init(mCCDBHost.c_str()); // or http://localhost:8080 for a local installation + + // store abitrary user object in strongly typed manner + int ret = api.storeAsTFileAny(&vect, mCCDBPathSoxOrbit, metadata, tmin, tmax); + if (ret == 0) { + LOG(info) << "SOX orbit saved in ccdb:" << mCCDBHost << " run:" << runNumber << " tmin:" << tmin << " tmax:" << tmax; + } else { + LOG(fatal) << "SOX orbit Problem writing to database ret:" << ret; + } + return 0; +} +int ctpCCDBManager::saveOrbitReset(long timeStamp) +{ + // data base + if (mCCDBHost == "none") { + LOG(info) << "Orbit Reset not written to CCDB none"; + return 0; + } + std::vector vect; + if (timeStamp == 0) { + auto now = std::chrono::system_clock::now(); + timeStamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + } + vect.push_back(timeStamp); + long tmin = timeStamp; + long tmax = tmin + 381928219; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + api.init(mCCDBHost.c_str()); // or http://localhost:8080 for a local installation + + // store abitrary user object in strongly typed manner + int ret = api.storeAsTFileAny(&vect, mCCDBPathOrbitReset, metadata, tmin, tmax); + if (ret == 0) { + LOG(info) << "Orbit reset saved in ccdb:" << mCCDBHost << " tmin:" << tmin << " tmax:" << tmax; + } else { + LOG(fatal) << "Orbit reset Problem writing to database ret:" << ret; + } + return 0; +} CTPConfiguration ctpCCDBManager::getConfigFromCCDB(long timestamp, std::string run, bool& ok) { auto& mgr = o2::ccdb::BasicCCDBManager::instance(); From fb4de2054a035146aa88a425cc46b1d54f10a63d Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 12 Mar 2025 13:59:23 +0100 Subject: [PATCH 0035/1764] write pairs correlation tree only if requested --- Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index 36530bfe9238b..db57ad5f8a7eb 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -591,9 +591,8 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) pr.nshTPCRow = shinfo.second; } } + (*mDBGOut) << "pairs" << "pr=" << trcPairsVec << "\n"; } - (*mDBGOut) << "pairs" - << "pr=" << trcPairsVec << "\n"; } int nvtot = mMaxNeighbours < 0 ? -1 : (int)pveVec.size(); From cf94b28f0d092a4e1351bb303143923f9b5487f7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 12 Mar 2025 14:56:33 +0100 Subject: [PATCH 0036/1764] Do not scale ITS tracking cuts for B=0 --- Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index cbb31ff8bceb7..b264ac46bc7b3 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -38,6 +38,9 @@ void ITSTrackingInterface::initialise() std::vector trackParams; const auto& trackConf = o2::its::TrackerParamConfig::Instance(); float bFactor = std::abs(o2::base::Propagator::Instance()->getNominalBz()) / 5.0066791; + if (bFactor < 0.01) { + bFactor = 1.; + } if (mMode == TrackingMode::Unset) { mMode = (TrackingMode)(trackConf.trackingMode); LOGP(info, "Tracking mode not set, trying to fetch it from configurable params to: {}", asString(mMode)); From 7486f59b97f80adc24644b949fcd4671c4045339 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 15:08:37 +0100 Subject: [PATCH 0037/1764] dpl-workflow: automatically apply MI100 workaround in async --- prodtests/full-system-test/dpl-workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index f9b0c7accbff9..76235d127037a 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -260,7 +260,7 @@ if [[ $GPUTYPE == "HIP" ]]; then GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi [[ ${EPN_NODE_MI100:-} != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 - [[ $EPNSYNCMODE == 1 && ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" + [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From fe6cd7c0a1239cfbbb257da2c1e18a61cc15adae Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 12 Mar 2025 16:52:13 +0100 Subject: [PATCH 0038/1764] DataModel: make V0s IsStandard explicit (#13937) This makes the check more explicit to really have a standard v0. If analysers just ask for this bit, for example they still would get tpc-only v0s. @ddobrigk --- Framework/Core/include/Framework/AnalysisDataModel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index 8c9f323f3dcc6..70db8500e3421 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -1596,7 +1596,7 @@ DECLARE_SOA_INDEX_COLUMN(Collision, collision); //! Coll DECLARE_SOA_COLUMN(V0Type, v0Type, uint8_t); //! custom bitmap for various selections (see below) DECLARE_SOA_DYNAMIC_COLUMN(IsStandardV0, isStandardV0, //! is standard V0 - [](uint8_t V0Type) -> bool { return V0Type & (1 << 0); }); + [](uint8_t V0Type) -> bool { return V0Type == 1; }); DECLARE_SOA_DYNAMIC_COLUMN(IsPhotonV0, isPhotonV0, //! is TPC-only V0 for which the photon-mass-hypothesis was good [](uint8_t V0Type) -> bool { return V0Type & (1 << 1); }); DECLARE_SOA_DYNAMIC_COLUMN(IsCollinearV0, isCollinearV0, //! is V0 for which the photon-mass-hypothesis was good and was fitted collinearly From f0d987e4be494d043bfb05500d31978d663ce796 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 23:46:59 +0100 Subject: [PATCH 0039/1764] GPU QA: Fix debug ROOT dump if we wrote a different ROOT file meanwhile --- GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx b/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx index 846c85e8e1cb3..7155b783e725e 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx +++ b/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx @@ -30,6 +30,7 @@ GPUROOTDumpCore::GPUROOTDumpCore(GPUROOTDumpCore::GPUROOTDumpCorePrivate) GPUROOTDumpCore::~GPUROOTDumpCore() { if (mFile) { + mFile->cd(); for (uint32_t i = 0; i < mBranches.size(); i++) { mBranches[i]->write(); } From 5c6657a7c1843c1a152eda3c7d2776e4ee14785c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 23:49:58 +0100 Subject: [PATCH 0040/1764] GPU QA: Add histogram with number of rows in which primary TPC track has clusters --- GPU/GPUTracking/qa/GPUQA.cxx | 125 +++++++++++++++++++++-------------- GPU/GPUTracking/qa/GPUQA.h | 8 +-- 2 files changed, 80 insertions(+), 53 deletions(-) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 552c82f1bd299..ba7aeb3800a5e 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -525,8 +525,10 @@ int32_t GPUQA::InitQACreateHistograms() if (mQATasks & taskTrackStatistics) { // Create Tracks Histograms - snprintf(name, 2048, "nclusters"); - createHist(mNCl, name, name, 160, 0, 159); + for (int32_t i = 0; i < 2; i++) { + snprintf(name, 2048, i ? "nrows_with_cluster" : "nclusters"); + createHist(mNCl[i], name, name, 160, 0, 159); + } snprintf(name, 2048, "tracks"); std::unique_ptr binsPt{CreateLogAxis(AXIS_BINS[4], PT_MIN_CLUST, PT_MAX)}; createHist(mTracks, name, name, AXIS_BINS[4], binsPt.get()); @@ -895,7 +897,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mTrackMCLabelsReverse[iCol][i] = -1; } } - if (mQATasks & taskClusterAttach) { + if (mQATasks & taskClusterAttach && GetNMCLabels()) { mClusterParam.resize(GetNMCLabels()); memset(mClusterParam.data(), 0, mClusterParam.size() * sizeof(mClusterParam[0])); } @@ -1661,7 +1663,25 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx continue; } mTracks->Fill(1.f / fabsf(track.GetParam().GetQPt())); - mNCl->Fill(track.NClustersFitted()); + mNCl[0]->Fill(track.NClustersFitted()); + uint32_t nClCorrected = 0; + int32_t lastSector = -1, lastRow = -1; + const auto& trackClusters = mTracking->mIOPtrs.mergedTrackHits; + for (uint32_t j = 0; j < track.NClusters(); j++) { + if (trackClusters[track.FirstClusterRef() + j].state & GPUTPCGMMergedTrackHit::flagReject) { + continue; + } + if (trackClusters[track.FirstClusterRef() + j].sector == lastSector && trackClusters[track.FirstClusterRef() + j].row == lastRow) { + continue; + } + if (trackClusters[track.FirstClusterRef() + j].leg != trackClusters[track.FirstClusterRef() + track.NClusters() - 1].leg) { + continue; + } + nClCorrected++; + lastSector = trackClusters[track.FirstClusterRef() + j].sector; + lastRow = trackClusters[track.FirstClusterRef() + j].sector; + } + mNCl[1]->Fill(nClCorrected); } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { @@ -2055,12 +2075,15 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) mLTracks = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); SetLegend(mLTracks); - mCNCl = createGarbageCollected("cncl", "Number of clusters per track", 0, 0, 700, 700. * 2. / 3.); - mCNCl->cd(); - mPNCl = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); - mPNCl->Draw(); - mLNCl = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); - SetLegend(mLNCl); + for (int32_t i = 0; i < 2; i++) { + snprintf(name, 2048, "cncl%d Pull", i); + mCNCl[i] = createGarbageCollected(name, i ? "Number of clusters (corrected for multiple per row)" : "Number of clusters per track", 0, 0, 700, 700. * 2. / 3.); + mCNCl[i]->cd(); + mPNCl[i] = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); + mPNCl[i]->Draw(); + mLNCl[i] = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); + SetLegend(mLNCl[i]); + } mCClXY = createGarbageCollected("clxy", "Number of clusters per X / Y", 0, 0, 700, 700. * 2. / 3.); mCClXY->cd(); @@ -2696,47 +2719,51 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) mCTracks->Print("plots/tracks.root"); } - tmpMax = 0.; - for (int32_t k = 0; k < ConfigNumInputs; k++) { - TH1F* e = mNCl; - if (GetHist(e, tin, k, nNewInput) == nullptr) { - continue; - } - e->SetMaximum(-1111); - if (e->GetMaximum() > tmpMax) { - tmpMax = e->GetMaximum(); - } - } - mPNCl->cd(); - for (int32_t k = 0; k < ConfigNumInputs; k++) { - TH1F* e = mNCl; - if (GetHist(e, tin, k, nNewInput) == nullptr) { - continue; - } - if (tout && !mConfig.inputHistogramsOnly && k == 0) { - e->Write(); + for (int32_t i = 0; i < 2; i++) { + tmpMax = 0.; + for (int32_t k = 0; k < ConfigNumInputs; k++) { + TH1F* e = mNCl[i]; + if (GetHist(e, tin, k, nNewInput) == nullptr) { + continue; + } + e->SetMaximum(-1111); + if (e->GetMaximum() > tmpMax) { + tmpMax = e->GetMaximum(); + } } - e->SetMaximum(tmpMax * 1.02); - e->SetMinimum(tmpMax * -0.02); - e->SetStats(kFALSE); - e->SetLineWidth(1); - e->GetYaxis()->SetTitle("a.u."); - e->GetXaxis()->SetTitle("NClusters"); - if (qcout) { - qcout->Add(e); + mPNCl[i]->cd(); + for (int32_t k = 0; k < ConfigNumInputs; k++) { + TH1F* e = mNCl[i]; + if (GetHist(e, tin, k, nNewInput) == nullptr) { + continue; + } + if (tout && !mConfig.inputHistogramsOnly && k == 0) { + e->Write(); + } + e->SetMaximum(tmpMax * 1.02); + e->SetMinimum(tmpMax * -0.02); + e->SetStats(kFALSE); + e->SetLineWidth(1); + e->GetYaxis()->SetTitle("a.u."); + e->GetXaxis()->SetTitle("NClusters"); + if (qcout) { + qcout->Add(e); + } + e->SetMarkerColor(kBlack); + e->SetLineColor(colorNums[k % COLORCOUNT]); + e->Draw(k == 0 ? "" : "same"); + GetName(fname, k); + snprintf(name, 2048, "%sNClusters%d", fname, i); + mLNCl[i]->AddEntry(e, name, "l"); + } + mLNCl[i]->Draw(); + mCNCl[i]->cd(); + snprintf(name, 2048, "plots/nClusters%s.pdf", i ? "_corrected" : ""); + mCNCl[i]->Print(name); + if (mConfig.writeRootFiles) { + snprintf(name, 2048, "plots/nClusters%s.root", i ? "_corrected" : ""); + mCNCl[i]->Print(name); } - e->SetMarkerColor(kBlack); - e->SetLineColor(colorNums[k % COLORCOUNT]); - e->Draw(k == 0 ? "" : "same"); - GetName(fname, k); - snprintf(name, 2048, "%sNClusters", fname); - mLNCl->AddEntry(e, name, "l"); - } - mLNCl->Draw(); - mCNCl->cd(); - mCNCl->Print("plots/nClusters.pdf"); - if (mConfig.writeRootFiles) { - mCNCl->Print("plots/nClusters.root"); } mPClXY->cd(); diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index 32b0553700f90..87900b5279ec0 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -299,10 +299,10 @@ class GPUQA TPad* mPTracks; TLegend* mLTracks; - TH1F* mNCl; - TCanvas* mCNCl; - TPad* mPNCl; - TLegend* mLNCl; + TH1F* mNCl[2]; + TCanvas* mCNCl[2]; + TPad* mPNCl[2]; + TLegend* mLNCl[2]; TH2F* mClXY; TCanvas* mCClXY; From 641977cccfa17710faaca7c18bbb7e607957b232 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Wed, 5 Mar 2025 18:41:31 +0100 Subject: [PATCH 0041/1764] Fixing handling of edge clusters Adapting edge correction Fixing edge handling Please consider the following formatting changes Fix for right edge check --- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx | 7 ++++++- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h | 2 +- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index d145aaed705d9..a826cdf71f575 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -97,7 +97,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime timeOffset, const GPUTPCGeometry& geo) +GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime timeOffset, const GPUTPCGeometry& geo, Charge* padBoundaryCharges) { mQtot += q; @@ -116,6 +116,11 @@ GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime if (CfUtils::isAtEdge(pos, geo.NPads(pos.row()))) { bool leftEdge = (pad < 2); bool correct = (leftEdge) ? (pad < mPadMean) : (pad > mPadMean); + if (leftEdge && pad == 1) { // only check charge at boundary if maximum is at least one pad away from boundary + correct = correct && (padBoundaryCharges[0] > 0); // Only correct if cluster is asymmetric with charge > 0 towards sector boundary, otherwise all charge is found + } else if (!leftEdge && pad == (geo.NPads(pos.row()) - 2)) { + correct = correct && (padBoundaryCharges[1] > 0); + } mPadMean = (correct) ? pad : mPadMean; } } diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index 26decbf0a5b14..c409a6cced3a5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -40,7 +40,7 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); - GPUd() void finalize(const ChargePos&, tpccf::Charge, tpccf::TPCTime, const GPUTPCGeometry&); + GPUd() void finalize(const ChargePos&, tpccf::Charge, tpccf::TPCTime, const GPUTPCGeometry&, tpccf::Charge*); GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&) const; private: diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index 1aeae812f5193..f28e80aa08201 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -58,6 +58,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; Charge charge = chargeMap[pos].unpack(); + Charge padBoundaryCharges[2] = {chargeMap[pos.delta({-1, 0})].unpack(), chargeMap[pos.delta({1, 0})].unpack()}; + ClusterAccumulator pc; CPU_ONLY(labelAcc->collect(pos, charge)); @@ -80,7 +82,7 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t } return; } - pc.finalize(pos, charge, fragment.start, clusterer.Param().tpcGeometry); + pc.finalize(pos, charge, fragment.start, clusterer.Param().tpcGeometry, padBoundaryCharges); tpc::ClusterNative myCluster; bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param()); From 46445fa21a058dd94017a350be1b8f74cf55d476 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 19:26:48 +0100 Subject: [PATCH 0042/1764] GPU TPC CF: Use corrected check also for edge cluster tagging --- GPU/GPUTracking/TPCClusterFinder/CfUtils.h | 5 -- .../TPCClusterFinder/ClusterAccumulator.cxx | 89 +++++++++---------- .../TPCClusterFinder/ClusterAccumulator.h | 4 +- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 6 +- 4 files changed, 45 insertions(+), 59 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h index 4504b8288aee0..75dcc166abd9b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h @@ -27,11 +27,6 @@ class CfUtils { public: - static GPUdi() bool isAtEdge(const ChargePos& pos, tpccf::GlobalPad padsPerRow) - { - return (pos.pad() < 2 || pos.pad() >= padsPerRow - 2); - } - static GPUdi() bool innerAboveThreshold(uint8_t aboveThreshold, uint16_t outerIdx) { return aboveThreshold & (1 << cfconsts::OuterToInner[outerIdx]); diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index a826cdf71f575..77dc6e119df7d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -21,45 +21,6 @@ using namespace o2::gpu; using namespace o2::gpu::tpccf; -GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param) const -{ - cn.qTot = CAMath::Float2UIntRn(mQtot); - if (cn.qTot <= param.rec.tpc.cfQTotCutoff) { - return false; - } - cn.qMax = q; - if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) { - return false; - } - if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) { - return false; - } - if (q <= param.rec.tpc.cfQMaxCutoffSingleTime && mTimeSigma == 0) { - return false; - } - if (q <= param.rec.tpc.cfQMaxCutoffSinglePad && mPadSigma == 0) { - return false; - } - - bool isEdgeCluster = CfUtils::isAtEdge(pos, param.tpcGeometry.NPads(pos.row())); - bool wasSplitInTime = mSplitInTime >= param.rec.tpc.cfMinSplitNum; - bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum; - bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0); - - uint8_t flags = 0; - flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0; - flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0; - flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0; - flags |= (isSingleCluster) ? tpc::ClusterNative::flagSingle : 0; - - cn.setTimeFlags(mTimeMean - param.rec.tpc.clustersShiftTimebinsClusterizer, flags); - cn.setPad(mPadMean); - cn.setSigmaTime(mTimeSigma); - cn.setSigmaPad(mPadSigma); - - return true; -} - GPUd() void ClusterAccumulator::update(Charge splitCharge, Delta2 d) { mQtot += splitCharge; @@ -97,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime timeOffset, const GPUTPCGeometry& geo, Charge* padBoundaryCharges) +GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param, TPCTime timeOffset, const Array2D& chargeMap) { mQtot += q; @@ -113,14 +74,48 @@ GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime mPadMean += pad; mTimeMean += timeOffset + pos.time(); - if (CfUtils::isAtEdge(pos, geo.NPads(pos.row()))) { + bool isEdgeCluster = pos.pad() < 2 || pos.pad() >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + if (isEdgeCluster) { bool leftEdge = (pad < 2); - bool correct = (leftEdge) ? (pad < mPadMean) : (pad > mPadMean); - if (leftEdge && pad == 1) { // only check charge at boundary if maximum is at least one pad away from boundary - correct = correct && (padBoundaryCharges[0] > 0); // Only correct if cluster is asymmetric with charge > 0 towards sector boundary, otherwise all charge is found - } else if (!leftEdge && pad == (geo.NPads(pos.row()) - 2)) { - correct = correct && (padBoundaryCharges[1] > 0); + if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { + isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. + } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { + mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak } - mPadMean = (correct) ? pad : mPadMean; } + + cn.qTot = CAMath::Float2UIntRn(mQtot); + if (cn.qTot <= param.rec.tpc.cfQTotCutoff) { + return false; + } + cn.qMax = q; + if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) { + return false; + } + if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) { + return false; + } + if (q <= param.rec.tpc.cfQMaxCutoffSingleTime && mTimeSigma == 0) { + return false; + } + if (q <= param.rec.tpc.cfQMaxCutoffSinglePad && mPadSigma == 0) { + return false; + } + + bool wasSplitInTime = mSplitInTime >= param.rec.tpc.cfMinSplitNum; + bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum; + bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0); + + uint8_t flags = 0; + flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0; + flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0; + flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0; + flags |= (isSingleCluster) ? tpc::ClusterNative::flagSingle : 0; + + cn.setTimeFlags(mTimeMean - param.rec.tpc.clustersShiftTimebinsClusterizer, flags); + cn.setPad(mPadMean); + cn.setSigmaTime(mTimeSigma); + cn.setSigmaPad(mPadSigma); + + return true; } diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index c409a6cced3a5..73f7cb439775a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -17,6 +17,7 @@ #include "clusterFinderDefs.h" #include "PackedCharge.h" +#include "Array2D.h" namespace o2 { @@ -40,8 +41,7 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); - GPUd() void finalize(const ChargePos&, tpccf::Charge, tpccf::TPCTime, const GPUTPCGeometry&, tpccf::Charge*); - GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&) const; + GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&, tpccf::TPCTime, const Array2D&); private: float mQtot = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index f28e80aa08201..407deb6a588d0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -58,8 +58,6 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; Charge charge = chargeMap[pos].unpack(); - Charge padBoundaryCharges[2] = {chargeMap[pos.delta({-1, 0})].unpack(), chargeMap[pos.delta({1, 0})].unpack()}; - ClusterAccumulator pc; CPU_ONLY(labelAcc->collect(pos, charge)); @@ -82,10 +80,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t } return; } - pc.finalize(pos, charge, fragment.start, clusterer.Param().tpcGeometry, padBoundaryCharges); - tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param()); + bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); if (rejectCluster) { if (clusterPosInRow) { From dffee581d1402171d2088cb1c5d4d5ad76d7bace Mon Sep 17 00:00:00 2001 From: iravasen Date: Thu, 13 Mar 2025 11:20:27 +0100 Subject: [PATCH 0043/1764] Refine calculations for rise time and ToT (#14039) --- .../workflow/src/ThresholdCalibratorSpec.cxx | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx index 075a4869210e1..b651bfdedf3aa 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx @@ -1001,8 +1001,8 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mFitType = NO_FIT; this->mMin = 0; this->mMax = 400; // strobe delay goes from 0 to 400 (included) in steps of 4 - this->mStep = 4; - this->mStrobeWindow = 5; // it's 4 but it corresponds to 4+1 (as from alpide manual) + this->mStep = 1; + this->mStrobeWindow = 1; // it's 0 but it corresponds to 0+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mCheckExactRow = true; } else if (runtype == TOT_CALIBRATION_1_ROW) { @@ -1013,7 +1013,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mMin = 0; this->mMax = 2000; // strobe delay goes from 0 to 2000 in steps of 10 this->mStep = 10; - this->mStrobeWindow = 2; // it's 1 but it corresponds to 1+1 (as from alpide manual) + this->mStrobeWindow = 10; // it's 9 but it corresponds to 9+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mMin2 = 0; // charge min this->mMax2 = 170; // charge max @@ -1028,7 +1028,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mMin = 300; this->mMax = 1100; // strobe delay goes from 300 to 1100 (included) in steps of 10 this->mStep = 10; - this->mStrobeWindow = 2; // it's 1 but it corresponds to 1+1 (as from alpide manual) + this->mStrobeWindow = 10; // it's 9 but it corresponds to 9+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mMin2 = 30; // charge min this->mMax2 = 60; // charge max @@ -1116,39 +1116,39 @@ std::vector ITSThresholdCalibrator::calculatePulseParams(const short int& for (auto itrow = mPixelHits[chipID].begin(); itrow != mPixelHits[chipID].end(); itrow++) { // loop over the chip rows short int row = itrow->first; - for (short int col_i = 0; col_i < this->N_COL; col_i++) { // loop over the pixels on the row - for (short int sdel_i = 0; sdel_i < this->N_RANGE; sdel_i++) { // loop over the strobe delays - if (mPixelHits[chipID][row][col_i][0][sdel_i] > 0 && mPixelHits[chipID][row][col_i][0][sdel_i] < nInj && rt_mindel < 0) { // from left, the last bin with 0 hits or the first with some hits - rt_mindel = sdel_i > 0 ? ((sdel_i - 1) * mStep) + 1 : (sdel_i * mStep) + 1; // + 1 because if delay = n, we get n+1 in reality (ALPIDE feature) + for (short int col_i = 0; col_i < this->N_COL; col_i++) { // loop over the pixels on the row + for (short int sdel_i = 0; sdel_i < this->N_RANGE; sdel_i++) { // loop over the strobe delays + if (mPixelHits[chipID][row][col_i][0][sdel_i] > 0.1 * nInj && mPixelHits[chipID][row][col_i][0][sdel_i] < nInj && rt_mindel < 0) { // from left, first bin with 10% hits and 90% hits + rt_mindel = (sdel_i * mStep) + 1; // + 1 because if delay = n, we get n+1 in reality (ALPIDE feature) } - if (mPixelHits[chipID][row][col_i][0][sdel_i] == nInj) { + if (mPixelHits[chipID][row][col_i][0][sdel_i] >= 0.9 * nInj) { // for Rt max take the 90% point rt_maxdel = (sdel_i * mStep) + 1; + break; + } + } + for (short int sdel_i = 0; sdel_i < N_RANGE; sdel_i++) { + if (mPixelHits[chipID][row][col_i][0][sdel_i] >= 0.5 * nInj) { // for ToT take the 50% point tot_mindel = (sdel_i * mStep) + 1; break; } } - for (short int sdel_i = N_RANGE - 1; sdel_i >= 0; sdel_i--) { // from right, the first bin with nInj hits - if (mPixelHits[chipID][row][col_i][0][sdel_i] == nInj) { + for (short int sdel_i = N_RANGE - 1; sdel_i >= 0; sdel_i--) { // from right, the first bin with 50% nInj hits + if (mPixelHits[chipID][row][col_i][0][sdel_i] >= 0.5 * nInj) { tot_maxdel = (sdel_i * mStep) + 1; break; } } if (tot_maxdel > tot_mindel && tot_mindel >= 0 && tot_maxdel >= 0) { - sumTot += tot_maxdel - tot_mindel - (int)(mStrobeWindow / 2); - sumSqTot += (tot_maxdel - tot_mindel - (int)(mStrobeWindow / 2)) * (tot_maxdel - tot_mindel - (int)(mStrobeWindow / 2)); + sumTot += tot_maxdel - tot_mindel - mStrobeWindow; + sumSqTot += (tot_maxdel - tot_mindel - mStrobeWindow) * (tot_maxdel - tot_mindel - mStrobeWindow); countTot++; } - if (rt_maxdel > rt_mindel && rt_maxdel > 0) { - if (rt_mindel < 0) { - sumRt += mStep + (int)(mStrobeWindow / 2); // resolution -> in case the rise is "instantaneous" - sumSqRt += (mStep + (int)(mStrobeWindow / 2)) * (mStep + (int)(mStrobeWindow / 2)); - } else { - sumRt += rt_maxdel - rt_mindel + (int)(mStrobeWindow / 2); - sumSqRt += (rt_maxdel - rt_mindel + (int)(mStrobeWindow / 2)) * (rt_maxdel - rt_mindel + (int)(mStrobeWindow / 2)); - } + if (rt_maxdel > rt_mindel && rt_maxdel > 0 && rt_mindel > 0) { + sumRt += rt_maxdel - rt_mindel + mStrobeWindow; + sumSqRt += (rt_maxdel - rt_mindel + mStrobeWindow) * (rt_maxdel - rt_mindel + mStrobeWindow); countRt++; } @@ -1232,8 +1232,8 @@ std::vector ITSThresholdCalibrator::calculatePulseParams2D(const short in } if (maxPl > tot_mindel && tot_mindel < 1e7 && maxPl >= 0) { // ToT - sumTot += maxPl - tot_mindel - (int)(mStrobeWindow / 2); - sumSqTot += (maxPl - tot_mindel - (int)(mStrobeWindow / 2)) * (maxPl - tot_mindel - (int)(mStrobeWindow / 2)); + sumTot += maxPl - tot_mindel - mStrobeWindow; + sumSqTot += (maxPl - tot_mindel - mStrobeWindow) * (maxPl - tot_mindel - mStrobeWindow); countTot++; } From 46380fc7fbd16567c6acae29dc991005a5b9b974 Mon Sep 17 00:00:00 2001 From: Chunzheng Wang <83008337+ChunzhengLab@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:21:59 +0100 Subject: [PATCH 0044/1764] AlpideResponse adjust for APTS (#13929) Signed-off-by: Chunzheng Wang --- .../AlpideResponseData/AlpideResponse.cxx | 66 ++++++++++++++----- .../ITSMFTSimulation/AlpideSimResponse.h | 4 +- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx b/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx index f4c71d85df24e..86ffb24b239ed 100644 --- a/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx +++ b/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx @@ -14,55 +14,87 @@ #include "ITSMFTSimulation/AlpideSimResponse.h" #include #include +#include #include #include #include #include #include -void alpideResponse(const std::string& inpath = "./", - const std::string& outpath = "./", - const std::string& response_file = "AlpideResponseData.root") +void alpideResponse(const std::string& inpath, const std::string& outpath, const std::string& chip_name) { + // Check input path validity + if (gSystem->AccessPathName(inpath.c_str())) { + throw std::invalid_argument("Input path does not exist or is inaccessible: " + inpath); + } + + // Check output path validity + if (gSystem->AccessPathName(outpath.c_str(), kWritePermission)) { + throw std::invalid_argument("Output path is not writable: " + outpath); + } o2::itsmft::AlpideSimResponse resp0, resp1; - resp0.initData(0, inpath.data()); - resp1.initData(1, inpath.data()); + if (chip_name == "Alpide") { + resp0.initData(0, inpath.c_str()); + resp1.initData(1, inpath.c_str()); + } else if (chip_name == "APTS") { + resp1.setColMax(1.5e-4); + resp1.setRowMax(1.5e-4); + resp1.initData(1, inpath.c_str()); + } else { + throw std::invalid_argument("Unknown chip name: " + chip_name); + } - auto file = TFile::Open((outpath + response_file).data(), "recreate"); - file->WriteObjectAny(&resp0, "o2::itsmft::AlpideSimResponse", "response0"); + std::string output_file = outpath + "/" + chip_name + "ResponseData.root"; + auto file = TFile::Open(output_file.c_str(), "recreate"); + + if (!file || file->IsZombie()) { + throw std::runtime_error("Failed to create output file: " + output_file); + } else if (chip_name == "Alpide") { + file->WriteObjectAny(&resp0, "o2::itsmft::AlpideSimResponse", "response0"); + } file->WriteObjectAny(&resp1, "o2::itsmft::AlpideSimResponse", "response1"); file->Close(); + delete file; } int main(int argc, const char* argv[]) { namespace bpo = boost::program_options; bpo::variables_map vm; - bpo::options_description options("Alpide reponse generator options"); - options.add_options()( - "inputdir,i", bpo::value()->default_value("./"), "Path where Vbb-0.0V and Vbb-3.0V are located.")( - "outputdir,o", bpo::value()->default_value("./"), "Path where to store the output.")( - "name,n", bpo::value()->default_value("AlpideResponseData.root"), "Output file name."); + bpo::options_description options("Alpide response generator options"); + options.add_options()("inputdir,i", bpo::value()->default_value("./"), "Path where Vbb-0.0V and Vbb-3.0V are located.")("outputdir,o", bpo::value()->default_value("./"), "Path where to store the output.")("chip,c", bpo::value()->default_value("Alpide"), "Chip name (Alpide or APTS)."); try { bpo::store(parse_command_line(argc, argv, options), vm); + if (vm.count("help")) { std::cout << options << std::endl; - return 1; + return 0; } + bpo::notify(vm); } catch (const bpo::error& e) { std::cerr << e.what() << "\n\n"; std::cerr << "Error parsing command line arguments. Available options:\n"; - std::cerr << options << std::endl; return 2; } - std::cout << "Generating " << vm["inputdir"].as() + vm["name"].as() << std::endl; - alpideResponse(vm["inputdir"].as(), vm["outputdir"].as(), vm["name"].as()); + try { + std::cout << "Generating response for chip: " << vm["chip"].as() << std::endl; + std::cout << "Input directory: " << vm["inputdir"].as() << std::endl; + std::cout << "Output directory: " << vm["outputdir"].as() << std::endl; + + alpideResponse(vm["inputdir"].as(), + vm["outputdir"].as(), + vm["chip"].as()); + std::cout << "Response file generated successfully." << std::endl; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } return 0; -} \ No newline at end of file +} diff --git a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h index 0462115d3bfc6..92656a16257a1 100644 --- a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h +++ b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h @@ -132,6 +132,8 @@ class AlpideSimResponse float getStepCol() const { return mStepInvCol ? 1. / mStepInvCol : 0.f; } float getStepRow() const { return mStepInvRow ? 1. / mStepInvRow : 0.f; } float getStepDepth() const { return mStepInvDpt ? 1. / mStepInvDpt : 0.f; } + void setColMax(float v) noexcept { mColMax = v; } + void setRowMax(float v) noexcept { mRowMax = v; } void setDataPath(const std::string pth) { mDataPath = pth; } void setGridColName(const std::string nm) { mGridColName = nm; } void setGridRowName(const std::string nm) { mGridRowName = nm; } @@ -142,7 +144,7 @@ class AlpideSimResponse const std::string& getColRowDataFmt() const { return mColRowDataFmt; } void print() const; - ClassDefNV(AlpideSimResponse, 1); + ClassDefNV(AlpideSimResponse, 2); }; //----------------------------------------------------- From c0831adaeecc26ceff98d7f1aae8b2e8b3c46741 Mon Sep 17 00:00:00 2001 From: Cas van Veen - They/them <96796377+Cas1997@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:42:23 +0100 Subject: [PATCH 0045/1764] ALICE3: TRK now hosts IRIS tracker which is divided into petal cases and made retractable (#13953) * ALICE3: TRK now hosts IRIS tracker which is divided into petal cases and made retractable * Fixed hits in sensitive volumes + fixed CMakeLists.txt * Applied clang * Uncommented services * Final bug fix * Fixed placement of the beam pipe from earlier PR * Applied clang --------- Co-authored-by: Matteo Concas --- .../ALICE3/FT3/simulation/src/Detector.cxx | 17 +- .../include/Alice3DetectorsPassive/Pipe.h | 12 +- .../Upgrades/ALICE3/Passive/src/Pipe.cxx | 144 ++----------- .../TRK/base/include/TRKBase/TRKBaseParam.h | 2 + .../ALICE3/TRK/simulation/CMakeLists.txt | 8 +- .../include/TRKSimulation/Detector.h | 7 +- .../include/TRKSimulation/TRKPetalCase.h | 93 ++++++++ .../include/TRKSimulation/TRKPetalDisk.h | 64 ++++++ .../include/TRKSimulation/TRKPetalLayer.h | 61 ++++++ .../include/TRKSimulation/TRKServices.h | 12 +- .../ALICE3/TRK/simulation/src/Detector.cxx | 108 +++++++--- .../TRK/simulation/src/TRKPetalCase.cxx | 202 ++++++++++++++++++ .../TRK/simulation/src/TRKPetalDisk.cxx | 94 ++++++++ .../TRK/simulation/src/TRKPetalLayer.cxx | 79 +++++++ .../ALICE3/TRK/simulation/src/TRKServices.cxx | 96 ++++----- .../TRK/simulation/src/TRKSimulationLinkDef.h | 3 + macro/build_geometry.C | 2 +- 17 files changed, 764 insertions(+), 240 deletions(-) create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx diff --git a/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx b/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx index 296bec8aa8922..ce132fdb33cd3 100644 --- a/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx +++ b/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx @@ -184,8 +184,8 @@ void Detector::buildBasicFT3(const FT3BaseParam& param) //_________________________________________________________________________________________________ void Detector::buildFT3V1() { - //Build FT3 detector according to - //https://indico.cern.ch/event/992488/contributions/4174473/attachments/2168881/3661331/tracker_parameters_werner_jan_11_2021.pdf + // Build FT3 detector according to + // https://indico.cern.ch/event/992488/contributions/4174473/attachments/2168881/3661331/tracker_parameters_werner_jan_11_2021.pdf LOG(info) << "Building FT3 Detector: V1"; @@ -284,17 +284,15 @@ void Detector::buildFT3NewVacuumVessel() // to adhere to the changes that were presented at the ALICE 3 Upgrade days in March 2024 // Inner radius at C-side to 7 cm // Inner radius at A-side stays at 5 cm + // 06.02.2025 update: IRIS layers are now in TRK LOG(info) << "Building FT3 Detector: After Upgrade Days March 2024 version"; - mNumberOfLayers = 12; + mNumberOfLayers = 9; float sensorThickness = 30.e-4; float layersx2X0 = 1.e-2; std::vector> layersConfigCSide{ - {26., .5, 2.5, 0.1f * layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} - {30., .5, 2.5, 0.1f * layersx2X0}, - {34., .5, 2.5, 0.1f * layersx2X0}, - {77., 7.0, 35., layersx2X0}, + {77., 7.0, 35., layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} {100., 7.0, 35., layersx2X0}, {122., 7.0, 35., layersx2X0}, {150., 7.0, 68.f, layersx2X0}, @@ -305,10 +303,7 @@ void Detector::buildFT3NewVacuumVessel() {350., 7.0, 68.f, layersx2X0}}; std::vector> layersConfigASide{ - {26., .5, 2.5, 0.1f * layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} - {30., .5, 2.5, 0.1f * layersx2X0}, - {34., .5, 2.5, 0.1f * layersx2X0}, - {77., 5.0, 35., layersx2X0}, + {77., 5.0, 35., layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} {100., 5.0, 35., layersx2X0}, {122., 5.0, 35., layersx2X0}, {150., 5.0, 68.f, layersx2X0}, diff --git a/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h b/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h index 1d9858e2dfec3..b13d9ab68780a 100644 --- a/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h +++ b/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h @@ -26,8 +26,6 @@ class Alice3Pipe : public Alice3PassiveBase ~Alice3Pipe() override; Alice3Pipe(const char* name, const char* title = "Alice 3 Pipe", - const bool isTRKActivated = false, - const bool isFT3Activated = false, const float pipeRIn = 0.f, const float pipeThickness = 0.f, const float a3ipLength = 0.f, @@ -50,9 +48,6 @@ class Alice3Pipe : public Alice3PassiveBase float getVacuumVesselWidth() const { return mVacuumVesselThick; } float getVacuumVesselLength() const { return mVacuumVesselASideLength; } - bool IsTRKActivated() const { return mIsTRKActivated; } - bool IsFT3Activated() const { return mIsFT3Activated; } - private: void createMaterials(); Alice3Pipe(const Alice3Pipe& orig) = default; @@ -62,13 +57,10 @@ class Alice3Pipe : public Alice3PassiveBase float mPipeThick = 0.; // inner beam pipe section thickness float mA3IPLength = 0.; // Length of A3IP - float mVacuumVesselRIn = 0.; // inner diameter of the vacuum vessel - float mVacuumVesselThick = 0.; // outer beam pipe section thickness + float mVacuumVesselRIn = 0.; // inner diameter of the vacuum vessel + float mVacuumVesselThick = 0.; // outer beam pipe section thickness float mVacuumVesselASideLength = 0.; // Length of the A Side of the vacuum vessel around the IP - bool mIsTRKActivated = true; // If TRK is not active don't create TRK layers allocations in the vacuum volume - bool mIsFT3Activated = true; - ClassDefOverride(Alice3Pipe, 1); }; } // namespace passive diff --git a/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx b/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx index 57f30241bd4ff..7dfd26a79b38d 100644 --- a/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx +++ b/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx @@ -12,8 +12,8 @@ #include "Alice3DetectorsPassive/Pipe.h" #include #include -#include -#include +#include "TGeoTube.h" +#include "TVirtualMC.h" #include "TGeoManager.h" // for TGeoManager, gGeoManager #include "TGeoMaterial.h" // for TGeoMaterial #include "TGeoMedium.h" // for TGeoMedium @@ -31,8 +31,6 @@ using namespace o2::passive; Alice3Pipe::Alice3Pipe() : Alice3PassiveBase{"Alice3PIPE", ""} {} Alice3Pipe::Alice3Pipe(const char* name, const char* title, - bool isTRKActivated, - bool isFT3Activated, float pipeRIn, float pipeThickness, float a3ipLength, @@ -40,8 +38,6 @@ Alice3Pipe::Alice3Pipe(const char* name, float vacuumVesselThickness, float vacuumVesselASideLength) : Alice3PassiveBase{name, title}, - mIsTRKActivated{isTRKActivated}, - mIsFT3Activated{isFT3Activated}, mPipeRIn{pipeRIn}, mPipeThick{pipeThickness}, mA3IPLength{a3ipLength}, @@ -85,7 +81,6 @@ void Alice3Pipe::ConstructGeometry() auto& matmgr = o2::base::MaterialManager::Instance(); const TGeoMedium* kMedBe = matmgr.getTGeoMedium("ALICE3_PIPE_BE"); - const TGeoMedium* kMedVac = matmgr.getTGeoMedium("ALICE3_PIPE_VACUUM"); // Top volume TGeoVolume* top = gGeoManager->GetVolume("cave"); @@ -96,96 +91,11 @@ void Alice3Pipe::ConstructGeometry() // We split the naming of the parts if the beam pipe for ALICE 3 into parts // - pipe A Side - // - vacuum vessel (which hosts the primary vacuum and covers all C Side as well) - // - iris vacuum vessel (which hosts the secondary vacuum) + // - pipe C Side (which hosts the primary vacuum vessel and covers all C Side as well) // A3IP update - // Vacuum - Double_t pipeASideLength = mA3IPLength / 2. - mVacuumVesselThick - mVacuumVesselASideLength; - Double_t pipeCSideLength = mA3IPLength / 2. + mVacuumVesselASideLength; - TGeoTube* vacuumBasePipe = new TGeoTube("PIPEVACUUM_BASEsh", 0., mPipeRIn, mA3IPLength / 2.); - TGeoTube* vacuumBaseVacuumVessel = new TGeoTube("VACUUM_VESSELVACUUM_BASEsh", mPipeRIn, mVacuumVesselRIn, pipeCSideLength / 2.); - - TGeoTranslation* posPipeCSide = new TGeoTranslation("PIPE_CSIDE_POSITION", 0, 0, mVacuumVesselASideLength - pipeCSideLength / 2.); - posPipeCSide->RegisterYourself(); - // Excavate volumes from the vacuum such that there is place for the TRK barrel layers and FT3 disc layers of the IRIS tracker - // And the other passive shapes: coldplate, iris tracker vacuum vessel - TGeoCompositeShape* vacuumComposite; - TGeoVolume* vacuumVolume; - TString compositeFormula{"PIPEVACUUM_BASEsh+VACUUM_VESSELVACUUM_BASEsh:PIPE_CSIDE_POSITION"}; - TString subtractorsFormula; - - if (!mIsTRKActivated) { - std::vector trkLayerShapes; - - std::vector> layersQuotas = {std::array{0.5f, 50.f, 100.e-4}, // TODO: Set layers dynamically. {radius, zLen, thickness} - std::array{1.2f, 50.f, 100.e-4}, - std::array{2.5f, 50.f, 100.e-4}}; - - for (auto iLayer{0}; iLayer < layersQuotas.size(); ++iLayer) { // Create TRK layers shapes - auto& layerData = layersQuotas[iLayer]; - trkLayerShapes.emplace_back(new TGeoTube(Form("TRKLAYER_%dsh", iLayer), layerData[0], layerData[0] + layerData[2], layerData[1] / 2.)); - if (iLayer != 0) { - subtractorsFormula += "+"; - } - subtractorsFormula += Form("TRKLAYER_%dsh", iLayer); - } - - // IRIS vacuum vessel and coldplate dimensions - float coldplateRIn = 2.6f; // cm - float coldplateThick = 150.e-3; // cm - float coldplateLength = 50.f; // cm - float irisVacuumVesselInnerRIn = 0.48f; // cm - float irisVacuumVesselOuterRIn = coldplateRIn + coldplateThick; - float irisVacuumVesselLength = 70.f; // cm - float irisVacuumVesselThick = 150.e-4; // cm - - // Excavate vacuum for hosting cold plate and IRIS tracker - TGeoTube* coldPlate = new TGeoTube("TRK_COLDPLATEsh", coldplateRIn, coldplateRIn + coldplateThick, coldplateLength / 2.); - subtractorsFormula += "+TRK_COLDPLATEsh"; - - TGeoTube* irisVacuumVesselInner = new TGeoTube("TRK_IRISVACUUMVESSELINNERsh", irisVacuumVesselInnerRIn, irisVacuumVesselInnerRIn + irisVacuumVesselThick, irisVacuumVesselLength / 2.); - subtractorsFormula += "+TRK_IRISVACUUMVESSELINNERsh"; - - TGeoTube* irisVacuumVesselOuter = new TGeoTube("TRK_IRISVACUUMVESSELOUTERsh", irisVacuumVesselOuterRIn, irisVacuumVesselOuterRIn + irisVacuumVesselThick, irisVacuumVesselLength / 2.); - subtractorsFormula += "+TRK_IRISVACUUMVESSELOUTERsh"; - - TGeoTube* irisVacuumVesselWall = new TGeoTube("TRK_IRISVACUUMVESSELWALLsh", irisVacuumVesselInnerRIn, irisVacuumVesselOuterRIn + irisVacuumVesselThick, irisVacuumVesselThick / 2.); - TGeoTranslation* posIrisVacVWallNegZSide = new TGeoTranslation("IRISWALLNEGZ", 0., 0., -irisVacuumVesselLength / 2. - irisVacuumVesselThick / 2.); - posIrisVacVWallNegZSide->RegisterYourself(); - subtractorsFormula += "+TRK_IRISVACUUMVESSELWALLsh:IRISWALLNEGZ"; - - TGeoTranslation* posIrisVacVWallPosZSide = new TGeoTranslation("IRISWALLPOSZ", 0., 0., irisVacuumVesselLength / 2. + irisVacuumVesselThick / 2.); - posIrisVacVWallPosZSide->RegisterYourself(); - subtractorsFormula += "+TRK_IRISVACUUMVESSELWALLsh:IRISWALLPOSZ"; - } - - if (!mIsFT3Activated) { - std::vector ft3DiscShapes; - std::vector ft3DiscPositions; - - std::vector> discsQuotas = {std::array{0.5f, 2.5f, 100.e-4, 26.}, // TODO: Set discs dynamically. {rIn, rOut, thickness, zpos} - std::array{0.5f, 2.5f, 100.e-4, 30.}, - std::array{0.5f, 2.5f, 100.e-4, 34.}, - std::array{0.5f, 2.5f, 100.e-4, -26.}, - std::array{0.5f, 2.5f, 100.e-4, -30.}, - std::array{0.5f, 2.5f, 100.e-4, -34.}}; - TString tempSubtractorsFormula = ""; - if (!mIsTRKActivated) { - tempSubtractorsFormula = "+"; - } - for (auto iDisc{0}; iDisc < discsQuotas.size(); ++iDisc) { - auto& discData = discsQuotas[iDisc]; - ft3DiscShapes.emplace_back(new TGeoTube(Form("FT3DISC_%dsh", iDisc), discData[0], discData[1], discData[2] / 2.)); - ft3DiscPositions.emplace_back(new TGeoTranslation(Form("t%d", iDisc), 0., 0., discData[3])); - ft3DiscPositions[iDisc]->RegisterYourself(); - if (iDisc != 0) { - tempSubtractorsFormula += "+"; - } - tempSubtractorsFormula += Form("FT3DISC_%dsh:t%d", iDisc, iDisc); - } - subtractorsFormula += tempSubtractorsFormula; - } + Double_t pipeASideLength = mA3IPLength / 2. - mVacuumVesselThick - mVacuumVesselASideLength / 2.; + Double_t pipeCSideLength = mA3IPLength / 2. + mVacuumVesselASideLength / 2.; // Pipe tubes TGeoTube* pipeASide = new TGeoTube("PIPE_Ash", mPipeRIn, mPipeRIn + mPipeThick, pipeASideLength / 2.); @@ -193,37 +103,25 @@ void Alice3Pipe::ConstructGeometry() TGeoTube* vacuumVesselWall = new TGeoTube("VACUUM_VESSEL_WALLsh", mPipeRIn, mVacuumVesselRIn + mVacuumVesselThick, mVacuumVesselThick / 2.); // Pipe and vacuum vessel positions - TGeoTranslation* posVacuumVesselWall = new TGeoTranslation("WALL_POSITION", 0, 0, mVacuumVesselASideLength + mVacuumVesselThick / 2.); - posVacuumVesselWall->RegisterYourself(); - TGeoTranslation* posPipeASide = new TGeoTranslation("PIPE_ASIDE_POSITION", 0, 0, mVacuumVesselASideLength + mVacuumVesselThick + pipeASideLength / 2.); + TGeoTranslation* posPipeASide = new TGeoTranslation("PIPE_ASIDE_POSITION", 0, 0, mVacuumVesselASideLength / 2. + mVacuumVesselThick + pipeASideLength / 2.); posPipeASide->RegisterYourself(); + TGeoTranslation* posPipeCSide = new TGeoTranslation("PIPE_CSIDE_POSITION", 0, 0, mVacuumVesselASideLength / 2. - pipeCSideLength / 2.); + posPipeCSide->RegisterYourself(); + TGeoTranslation* posVacuumVesselWall = new TGeoTranslation("WALL_POSITION", 0, 0, mVacuumVesselASideLength / 2. + mVacuumVesselThick / 2.); + posVacuumVesselWall->RegisterYourself(); // Pipe composite shape and volume TString pipeCompositeFormula = - "VACUUM_VESSEL_WALLsh:WALL_POSITION" - "+PIPE_Ash:PIPE_ASIDE_POSITION" - "+PIPE_Csh:PIPE_CSIDE_POSITION"; - - if (subtractorsFormula.Length()) { - LOG(info) << "Subtractors formula before : " << subtractorsFormula; - subtractorsFormula = Form("-(%s)", subtractorsFormula.Data()); - LOG(info) << "Subtractors formula after: " << subtractorsFormula; - - vacuumComposite = new TGeoCompositeShape("VACUUM_BASEsh", (compositeFormula + subtractorsFormula).Data()); - vacuumVolume = new TGeoVolume("VACUUM_BASE", vacuumComposite, kMedVac); - } else { - vacuumComposite = new TGeoCompositeShape("VACUUM_BASEsh", compositeFormula.Data()); - vacuumVolume = new TGeoVolume("VACUUM_BASE", vacuumComposite, kMedVac); - } + "PIPE_Ash:PIPE_ASIDE_POSITION" + "+PIPE_Csh:PIPE_CSIDE_POSITION" + "+VACUUM_VESSEL_WALLsh:WALL_POSITION"; TGeoCompositeShape* pipeComposite = new TGeoCompositeShape("A3IPsh", pipeCompositeFormula); TGeoVolume* pipeVolume = new TGeoVolume("A3IP", pipeComposite, kMedBe); // Add everything to the barrel - barrel->AddNode(vacuumVolume, 1, new TGeoTranslation(0, 30.f, 0)); barrel->AddNode(pipeVolume, 1, new TGeoTranslation(0, 30.f, 0)); - vacuumVolume->SetLineColor(kGreen + 3); pipeVolume->SetLineColor(kGreen + 3); } @@ -236,15 +134,6 @@ void Alice3Pipe::createMaterials() float sxmgmx = 10.; o2::base::Detector::initFieldTrackingParams(isxfld, sxmgmx); - // - // Air - // - float aAir[4] = {12.0107, 14.0067, 15.9994, 39.948}; - float zAir[4] = {6., 7., 8., 18.}; - float wAir[4] = {0.000124, 0.755267, 0.231781, 0.012827}; - float dAir = 1.20479E-3; - float dAir1 = 1.20479E-11; - // **************** // Defines tracking media parameters. // @@ -258,13 +147,8 @@ void Alice3Pipe::createMaterials() auto& matmgr = o2::base::MaterialManager::Instance(); // Beryllium - matmgr.Material("ALICE3_PIPE", 5, "BERILLIUM$", 9.01, 4., 1.848, 35.3, 36.7); + matmgr.Material("ALICE3_PIPE", 5, "BERYLLIUM$", 9.01, 4., 1.848, 35.3, 36.7); matmgr.Medium("ALICE3_PIPE", 5, "BE", 5, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); - - // Vacuum - matmgr.Mixture("ALICE3_PIPE", 16, "VACUUM$ ", aAir, zAir, dAir1, 4, wAir); - - matmgr.Medium("ALICE3_PIPE", 16, "VACUUM", 16, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); } // ---------------------------------------------------------------------------- diff --git a/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h b/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h index 9ea4bd1072d91..63c95b1e6b2f6 100644 --- a/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h +++ b/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h @@ -29,6 +29,8 @@ enum eLayout { struct TRKBaseParam : public o2::conf::ConfigurableParamHelper { std::string configFile = ""; float serviceTubeX0 = 0.02f; // X0 Al2O3 + Bool_t irisOpen = false; + eLayout layoutML = kCylinder; // Type of segmentation for the middle layers eLayout layoutOL = kCylinder; // Type of segmentation for the outer layers diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt index 7706c0e10d778..c21b7b9aebbf6 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt @@ -13,6 +13,9 @@ o2_add_library(TRKSimulation SOURCES src/TRKLayer.cxx src/Detector.cxx src/TRKServices.cxx + src/TRKPetalCase.cxx + src/TRKPetalLayer.cxx + src/TRKPetalDisk.cxx PUBLIC_LINK_LIBRARIES O2::TRKBase O2::FT3Simulation O2::ITSMFTSimulation) @@ -20,4 +23,7 @@ o2_add_library(TRKSimulation o2_target_root_dictionary(TRKSimulation HEADERS include/TRKSimulation/Detector.h include/TRKSimulation/TRKLayer.h - include/TRKSimulation/TRKServices.h) \ No newline at end of file + include/TRKSimulation/TRKServices.h + include/TRKSimulation/TRKPetalCase.h + include/TRKSimulation/TRKPetalLayer.h + include/TRKSimulation/TRKPetalDisk.h) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h index 5b777641dbe99..31f3da7a00bb4 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h @@ -17,6 +17,7 @@ #include "TRKSimulation/TRKLayer.h" #include "TRKSimulation/TRKServices.h" +#include "TRKSimulation/TRKPetalCase.h" #include "TRKBase/GeometryTGeo.h" #include @@ -87,7 +88,11 @@ class Detector : public o2::base::DetImpl GeometryTGeo* mGeometryTGeo; //! std::vector* mHits; // ITSMFT ones for the moment std::vector mLayers; - TRKServices mServices; + TRKServices mServices; // Houses the services of the TRK, but not the Iris tracker + std::vector mPetalCases; // Houses the Iris tracker and its services. Created fully in the beam pipe + + std::vector mFirstOrLastLayers; // Names of the first or last layers + bool InsideFirstOrLastLayer(std::string layerName); void defineSensitiveVolumes(); diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h new file mode 100644 index 0000000000000..cd45cc98fd177 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h @@ -0,0 +1,93 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_TRK_PETALCASE_H +#define ALICEO2_TRK_PETALCASE_H + +#include + +#include "TRKSimulation/TRKPetalLayer.h" +#include "TRKSimulation/TRKPetalDisk.h" +#include "TGeoCompositeShape.h" + +namespace o2 +{ +namespace trk +{ +class TRKPetalCase +{ + public: + TRKPetalCase() = default; + TRKPetalCase(Int_t number, TGeoVolume* motherVolume, Bool_t irisOpen); + ~TRKPetalCase() = default; + + // Sensitive volume list + std::vector mPetalLayers; + std::vector mPetalDisks; + + auto getPetalCaseName() { return mPetalCaseName; } + TString getFullName(); + + private: + void constructCase(TGeoVolume* motherVolume); + void constructColdPlate(TGeoVolume* motherVolume); + void constructDetectionPetals(TGeoVolume* motherVolume); + void addDetectionPetelsToFullComposite(); + + void addToPetalCaseComposite(TString shape) { mFullCompositeFormula += ("+" + shape); } + + Int_t mPetalCaseNumber; // Used to determine rotation and position. 0-3 + Bool_t mOpenState; // At injection energy, the iris tracker is in the open position. During stable beams, it is closed + + TString mPetalCaseName; + TString mFullCompositeFormula; // Used to excavate the petal and all its components from the vacuum + + // Center position of the petal case. 0,0,0 at stable beams (a.k.a. closed state) + Double_t mXPos, mYPos, mZPos; + + Double_t mWallThickness; // cm // Assume all the walls have the same thickness for now + Double_t mRIn; // cm + Double_t mROut; // cm + Double_t mRInOpenState; // cm + Double_t mPetalCaseLength; // cm + + Double_t mAngularCoverageAzimuthalWall; // Rad // Angular coverage of azimuthal part of wall (equivalent to that of the sensitive volumes) + Double_t mAngularCoverageRadialWall; // Rad // Angular coverage of radial part of wall + Double_t mToDeg; + + // Petal case parts -> In one composite shape + TGeoTubeSeg* mInnerAzimuthalWall; + TGeoTubeSeg* mOuterAzimuthalWall; + TGeoTubeSeg* mRadialWall; + TGeoTubeSeg* mForwardWall; + + TGeoRotation* mAzimuthalWallRot; + TGeoRotation* mRadialWall1Rot; + TGeoRotation* mRadialWall2Rot; + + TGeoCombiTrans* mAzimuthalWallCombiTrans; + TGeoCombiTrans* mRadialWall1CombiTrans; + TGeoCombiTrans* mRadialWall2CombiTrans; + TGeoCombiTrans* mForwardWall1CombiTrans; + TGeoCombiTrans* mForwardWall2CombiTrans; + + TGeoVolume* mPetalCaseVolume; + + // Cold plate + TGeoTubeSeg* mColdPlate; + TGeoVolume* mColdPlateVolume; + + ClassDef(TRKPetalCase, 1); +}; + +} // namespace trk +} // namespace o2 +#endif // ALICEO2_TRK_PETALCASE_H \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h new file mode 100644 index 0000000000000..465f52eb8d41b --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h @@ -0,0 +1,64 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file TRKPetalDisk.h +/// \brief Definition of the TRKPetalDisk class + +#ifndef ALICEO2_TRK_PETAL_DISK_H_ +#define ALICEO2_TRK_PETAL_DISK_H_ + +#include "TGeoManager.h" // for gGeoManager +#include "Rtypes.h" // for Double_t, Int_t, Bool_t, etc +#include // for LOG + +namespace o2 +{ +namespace trk +{ + +/// This class defines the Geometry for the TRK Disk TGeo. +class TRKPetalDisk +{ + public: + TRKPetalDisk() = default; + TRKPetalDisk(Int_t diskNumber, std::string diskName, Float_t z, Float_t rIn, Float_t rOut, Float_t angularCoverage, Float_t Diskx2X0); + ~TRKPetalDisk() = default; + + auto getInnerRadius() const { return mInnerRadius; } + auto getOuterRadius() const { return mOuterRadius; } + auto getThickness() const { return mChipThickness; } + auto getAngularCoverage() const { return mAngularCoverage; } + auto getZ() const { return mZ; } + auto getx2X0() const { return mx2X0; } + auto getName() const { return mDiskName; } + auto getSensorName() const { return mSensorName; } + + /// Creates the actual Disk and places inside its mother volume + /// \param motherVolume the TGeoVolume owing the volume structure + void createDisk(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans); + + private: + Int_t mDiskNumber = -1; ///< Current disk number + std::string mDiskName; ///< Current disk name + std::string mSensorName; + Double_t mInnerRadius; ///< Inner radius of this disk + Double_t mOuterRadius; ///< Outer radius of this disk + Double_t mAngularCoverage; + Double_t mZ; ///< Z position of the disk + Double_t mChipThickness; ///< Chip thickness + Double_t mx2X0; ///< Disk material budget x/X0 + + ClassDef(TRKPetalDisk, 1); +}; +} // namespace trk +} // namespace o2 + +#endif // ALICEO2_TRK_PETAL_DISK_H diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h new file mode 100644 index 0000000000000..4e7a7735d51f0 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h @@ -0,0 +1,61 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_TRK_PETAL_LAYER_H +#define ALICEO2_TRK_PETAL_LAYER_H + +#include "TGeoManager.h" +#include +#include "TGeoTube.h" + +#include "TRKBase/TRKBaseParam.h" + +namespace o2 +{ +namespace trk +{ +class TRKPetalLayer +{ + public: + TRKPetalLayer() = default; + TRKPetalLayer(Int_t layerNumber, std::string layerName, Float_t rIn, Float_t angularCoverage, Float_t zLength, Float_t layerX2X0); + ~TRKPetalLayer() = default; + + auto getInnerRadius() const { return mInnerRadius; } + auto getAngularCoverage() const { return mAngularCoverage; } + auto getZLength() { return mZ; } + auto getx2X0() const { return mX2X0; } + auto getChipThickness() const { return mChipThickness; } + auto getNumber() const { return mLayerNumber; } + auto getName() const { return mLayerName; } + auto getSensorName() const { return mSensorName; } + + void createLayer(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans); + + private: + Int_t mLayerNumber; + std::string mLayerName; + std::string mSensorName; + Float_t mInnerRadius; + Float_t mZ; + Float_t mX2X0; + Float_t mChipThickness; + Float_t mModuleWidth; // u.m. = cm + Float_t mAngularCoverage; // rad + + TGeoTubeSeg* mLayer; + + ClassDef(TRKPetalLayer, 1); +}; + +} // namespace trk +} // namespace o2 +#endif // ALICEO2_TRK_PETAL_LAYER_H \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h index 4a12be8572ed5..8dd3968743024 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h @@ -46,15 +46,18 @@ class TRKServices : public FairModule // =================================================== ---> createOuterServices public: TRKServices() = default; - TRKServices(float rMin, float zLength, float thickness); void createMaterials(); void createServices(TGeoVolume* motherVolume); - void createColdplate(TGeoVolume* motherVolume); void createMiddleServices(TGeoVolume* motherVolume); void createOuterDisksServices(TGeoVolume* motherVolume); void createOuterBarrelServices(TGeoVolume* motherVolume); + void createVacuumCompositeShape(); + void excavateFromVacuum(TString shapeToExcavate); + void registerVacuum(TGeoVolume* motherVolume); protected: + // Vacuum + TString mVacuumCompositeFormula; // Coldplate float mColdPlateRMin; // cm float mColdPlateZLength; // cm @@ -70,11 +73,6 @@ class TRKServices : public FairModule float mMiddleDiskThickness = 1.0; // cm std::vector mCableFanWeights = {0.5, 0.3, 0.2}; // relative weights of the fan layers - // IRIS vacuum vessel - float mRInIRISVacV; // cm - float mROutIRISVacV; // cm - float mZLengthIRISVacV; // cm - float mThicknessIRISVacV; // cm ClassDefOverride(TRKServices, 1); }; } // namespace trk diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx index b9015ce578caf..9b8ffc07b2d0e 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx @@ -80,17 +80,17 @@ void Detector::configDefault() mLayers.clear(); LOGP(warning, "Loading Scoping Document configuration for ALICE3 TRK"); - mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); - mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); - mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); - mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 3.78f, 124.f, 100.e-3); - mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 7.f, 124.f, 100.e-3); - mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 12.f, 124.f, 100.e-3); - mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 20.f, 124.f, 100.e-3); - mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 30.f, 124.f, 100.e-3); - mLayers.emplace_back(8, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(8)}, 45.f, 258.f, 100.e-3); - mLayers.emplace_back(9, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(9)}, 60.f, 258.f, 100.e-3); - mLayers.emplace_back(10, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(10)}, 80.f, 258.f, 100.e-3); + // mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); + // mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); + // mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); + mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 3.78f, 124.f, 100.e-3); + mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 7.f, 124.f, 100.e-3); + mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 12.f, 124.f, 100.e-3); + mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 20.f, 124.f, 100.e-3); + mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 30.f, 124.f, 100.e-3); + mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 45.f, 258.f, 100.e-3); + mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 60.f, 258.f, 100.e-3); + mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 80.f, 258.f, 100.e-3); } void Detector::buildTRKNewVacuumVessel() @@ -103,31 +103,32 @@ void Detector::buildTRKNewVacuumVessel() mLayers.clear(); LOGP(warning, "Loading \"After Upgrade Days March 2024\" configuration for ALICE3 TRK"); - mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); - mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); - mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); - mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 7.f, 124.f, 100.e-3); - mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 9.f, 124.f, 100.e-3); - mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 12.f, 124.f, 100.e-3); - mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 20.f, 124.f, 100.e-3); - mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 30.f, 124.f, 100.e-3); - mLayers.emplace_back(8, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(8)}, 45.f, 258.f, 100.e-3); - mLayers.emplace_back(9, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(9)}, 60.f, 258.f, 100.e-3); - mLayers.emplace_back(10, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(10)}, 80.f, 258.f, 100.e-3); + // mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); + // mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); + // mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); + mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 7.f, 124.f, 100.e-3); + LOGP(info, "TRKLayer created. Name: {}", std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}); + mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 9.f, 124.f, 100.e-3); + mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 12.f, 124.f, 100.e-3); + mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 20.f, 124.f, 100.e-3); + mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 30.f, 124.f, 100.e-3); + mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 45.f, 258.f, 100.e-3); + mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 60.f, 258.f, 100.e-3); + mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 80.f, 258.f, 100.e-3); auto& trkPars = TRKBaseParam::Instance(); // Middle layers + mLayers[0].setLayout(trkPars.layoutML); + mLayers[1].setLayout(trkPars.layoutML); + mLayers[2].setLayout(trkPars.layoutML); mLayers[3].setLayout(trkPars.layoutML); - mLayers[4].setLayout(trkPars.layoutML); - mLayers[5].setLayout(trkPars.layoutML); - mLayers[6].setLayout(trkPars.layoutML); // Outer tracker + mLayers[4].setLayout(trkPars.layoutOL); + mLayers[5].setLayout(trkPars.layoutOL); + mLayers[6].setLayout(trkPars.layoutOL); mLayers[7].setLayout(trkPars.layoutOL); - mLayers[8].setLayout(trkPars.layoutOL); - mLayers[9].setLayout(trkPars.layoutOL); - mLayers[10].setLayout(trkPars.layoutOL); } void Detector::configFromFile(std::string fileName) @@ -173,7 +174,7 @@ void Detector::configToFile(std::string fileName) void Detector::configServices() { - mServices = TRKServices{2.6f, 50.f, 150.e-3}; + mServices = TRKServices(); } void Detector::createMaterials() @@ -237,6 +238,14 @@ void Detector::createGeometry() // Add service for inner tracker mServices.createServices(vTRK); + mPetalCases.clear(); + // Add petal cases (the sensitive layers inside the petal cases get constructed here too) + auto& trkPars = TRKBaseParam::Instance(); + for (Int_t petalCaseNumber = 0; petalCaseNumber < 4; ++petalCaseNumber) { + mPetalCases.emplace_back(petalCaseNumber, vTRK, trkPars.irisOpen); + mServices.excavateFromVacuum(mPetalCases[petalCaseNumber].getFullName()); + } + mServices.registerVacuum(vTRK); } void Detector::InitializeO2Detector() @@ -254,9 +263,35 @@ void Detector::defineSensitiveVolumes() TString volumeName; LOGP(info, "Adding TRK Sensitive Volumes"); + // Add petal case sensitive volumes + for (int petalCase = 0; petalCase < 4; ++petalCase) { + // Petal layers + for (int petalLayer = 0; petalLayer < mPetalCases[petalCase].mPetalLayers.size(); ++petalLayer) { + volumeName = mPetalCases[petalCase].mPetalLayers[petalLayer].getSensorName(); + if (petalLayer == 0) { + mFirstOrLastLayers.push_back(volumeName.Data()); + } + LOGP(info, "Trying {}", volumeName.Data()); + v = geoManager->GetVolume(volumeName.Data()); + LOGP(info, "Adding TRK Sensitive Volume {}", v->GetName()); + AddSensitiveVolume(v); + } + // Petal disks + for (int petalDisk = 0; petalDisk < mPetalCases[petalCase].mPetalDisks.size(); ++petalDisk) { + volumeName = mPetalCases[petalCase].mPetalDisks[petalDisk].getSensorName(); + LOGP(info, "Trying {}", volumeName.Data()); + v = geoManager->GetVolume(volumeName.Data()); + LOGP(info, "Adding TRK Sensitive Volume {}", v->GetName()); + AddSensitiveVolume(v); + } + } + // The names of the TRK sensitive volumes have the format: TRKLayer(0...mLayers.size()-1) for (int j{0}; j < mLayers.size(); j++) { volumeName = GeometryTGeo::getTRKSensorPattern() + TString::Itoa(j, 10); + if (j == mLayers.size() - 1) { + mFirstOrLastLayers.push_back(volumeName.Data()); + } LOGP(info, "Trying {}", volumeName.Data()); v = geoManager->GetVolume(volumeName.Data()); LOGP(info, "Adding TRK Sensitive Volume {}", v->GetName()); @@ -284,6 +319,18 @@ void Detector::Reset() } } +bool Detector::InsideFirstOrLastLayer(std::string layerName) +{ + bool inside = false; + for (auto& firstOrLastLayer : mFirstOrLastLayers) { + if (firstOrLastLayer == layerName) { + inside = true; + break; + } + } + return inside; +} + bool Detector::ProcessHits(FairVolume* vol) { // This method is called from the MC stepping @@ -296,7 +343,8 @@ bool Detector::ProcessHits(FairVolume* vol) // Is it needed to keep a track reference when the outer ITS volume is encountered? auto stack = (o2::data::Stack*)fMC->GetStack(); - if (fMC->IsTrackExiting() && (lay == 0 || lay == mLayers.size() - 1)) { + // if (fMC->IsTrackExiting() && (lay == 0 || lay == mLayers.size() - 1)) { + if (fMC->IsTrackExiting() && InsideFirstOrLastLayer(vol->GetName())) { // Keep the track refs for the innermost and outermost layers only o2::TrackReference tr(*fMC, GetDetId()); tr.setTrackID(stack->GetCurrentTrackNumber()); diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx new file mode 100644 index 0000000000000..c729d7d1ec4dd --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx @@ -0,0 +1,202 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKSimulation/TRKPetalCase.h" +#include "TRKBase/GeometryTGeo.h" +#include + +#include "Framework/Logger.h" + +#include "TGeoTube.h" +#include "TGeoMatrix.h" +#include "TGeoCompositeShape.h" +#include "TGeoVolume.h" +#include "TString.h" +#include "TMath.h" + +namespace o2 +{ +namespace trk +{ +TRKPetalCase::TRKPetalCase(Int_t number, TGeoVolume* motherVolume, Bool_t irisOpen) : mPetalCaseNumber(number), mOpenState(irisOpen) +{ + + mWallThickness = .15e-1; // cm // Assume all the walls have the same thickness for now. + mRIn = 0.48; // cm + mROut = 3; // cm + mRInOpenState = 1.5; // cm + mPetalCaseLength = 70.; // cm + + // Calculate angular coverages of azimuthal part of wall (equivalent to that of the sensitive volumes) + mAngularCoverageAzimuthalWall = (0.25 * (2 * mRIn * TMath::Pi()) - 2 * mWallThickness) / mRIn; + mAngularCoverageRadialWall = mWallThickness / mRIn; + mToDeg = 180 / TMath::Pi(); + + // Calculate the center of the petal (x_c, y_c, z_c) based on whether it is open or not + mZPos = 0; + if (mOpenState) { + Double_t rHalfPetal = 0.5 * (mRIn + mROut); + Double_t rOpenStateCenter = TMath::Sqrt(rHalfPetal * rHalfPetal + mRInOpenState * mRInOpenState); + mXPos = rOpenStateCenter * TMath::Cos(0.25 * TMath::Pi() + (mPetalCaseNumber - 1) * 0.5 * TMath::Pi()); + mYPos = rOpenStateCenter * TMath::Sin(0.25 * TMath::Pi() + (mPetalCaseNumber - 1) * 0.5 * TMath::Pi()); + } else { + mXPos = 0.; + mYPos = 0.; + } + + // Make the petal case + constructCase(motherVolume); + // Make coldplate + constructColdPlate(motherVolume); + // Add the detection petals (quarter disks and barrel layers) + constructDetectionPetals(motherVolume); +} + +TString TRKPetalCase::getFullName() +{ + TString fullCompositeName = Form("PETALCASE%d_FULLCOMPOSITE", mPetalCaseNumber); + TGeoCompositeShape* fullCompositeShape = new TGeoCompositeShape(fullCompositeName, mFullCompositeFormula); + return fullCompositeName; +} + +void TRKPetalCase::constructCase(TGeoVolume* motherVolume) +{ + + // Petal case parts in TGeoTubeSeg + mInnerAzimuthalWall = new TGeoTubeSeg(Form("PETAL%d_INNER_AZIMUTHAL_WALL", mPetalCaseNumber), mRIn, mRIn + mWallThickness, mPetalCaseLength / 2., -0.5 * mAngularCoverageAzimuthalWall * mToDeg, 0.5 * mAngularCoverageAzimuthalWall * mToDeg); + mOuterAzimuthalWall = new TGeoTubeSeg(Form("PETAL%d_OUTER_AZIMUTHAL_WALL", mPetalCaseNumber), mROut, mROut + mWallThickness, mPetalCaseLength / 2., -0.5 * mAngularCoverageAzimuthalWall * mToDeg, 0.5 * mAngularCoverageAzimuthalWall * mToDeg); + mRadialWall = new TGeoTubeSeg(Form("PETAL%d_RADIAL_WALL", mPetalCaseNumber), mRIn, mROut + mWallThickness, mPetalCaseLength / 2., -0.5 * mAngularCoverageRadialWall * mToDeg, 0.5 * mAngularCoverageRadialWall * mToDeg); + mForwardWall = new TGeoTubeSeg(Form("PETAL%d_FORWARD_WALL", mPetalCaseNumber), mRIn, mROut + mWallThickness, mWallThickness / 2., -0.5 * (mAngularCoverageAzimuthalWall + 2 * mAngularCoverageRadialWall) * mToDeg, 0.5 * (mAngularCoverageAzimuthalWall + 2 * mAngularCoverageRadialWall) * mToDeg); + + // Rotate to correct section : 0-3 + mAzimuthalWallRot = new TGeoRotation((TString)Form("PETAL%d_AZIMUTHAL_WALL_ROT", mPetalCaseNumber), (mPetalCaseNumber * 0.5 * TMath::Pi() + 0.5 * mAngularCoverageAzimuthalWall + mAngularCoverageRadialWall) * mToDeg, 0., 0.); + mAzimuthalWallRot->RegisterYourself(); + mRadialWall1Rot = new TGeoRotation((TString)Form("PETAL%d_RADIAL_WALL1_ROT", mPetalCaseNumber), (mPetalCaseNumber * 0.5 * TMath::Pi() + 0.5 * mAngularCoverageRadialWall) * mToDeg, 0., 0.); + mRadialWall1Rot->RegisterYourself(); + mRadialWall2Rot = new TGeoRotation((TString)Form("PETAL%d_RADIAL_WALL2_ROT", mPetalCaseNumber), (mPetalCaseNumber * 0.5 * TMath::Pi() + mAngularCoverageAzimuthalWall + 1.5 * mAngularCoverageRadialWall) * mToDeg, 0., 0.); + mRadialWall2Rot->RegisterYourself(); + + // Place to correct position (open or closed) + mAzimuthalWallCombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_AZIMUTHAL_WALL_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, mZPos, mAzimuthalWallRot); + mAzimuthalWallCombiTrans->RegisterYourself(); + mRadialWall1CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_RADIAL_WALL1_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, mZPos, mRadialWall1Rot); + mRadialWall1CombiTrans->RegisterYourself(); + mRadialWall2CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_RADIAL_WALL2_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, mZPos, mRadialWall2Rot); + mRadialWall2CombiTrans->RegisterYourself(); + mForwardWall1CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_FORWARD_WALL1_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, (mPetalCaseLength + mWallThickness) / 2., mAzimuthalWallRot); + mForwardWall1CombiTrans->RegisterYourself(); + mForwardWall2CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_FORWARD_WALL2_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, -(mPetalCaseLength + mWallThickness) / 2., mAzimuthalWallRot); + mForwardWall2CombiTrans->RegisterYourself(); + + TString petalCaseCompositeFormula = (TString)Form("PETAL%d_INNER_AZIMUTHAL_WALL:PETAL%d_AZIMUTHAL_WALL_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_OUTER_AZIMUTHAL_WALL:PETAL%d_AZIMUTHAL_WALL_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_RADIAL_WALL:PETAL%d_RADIAL_WALL1_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_RADIAL_WALL:PETAL%d_RADIAL_WALL2_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_FORWARD_WALL:PETAL%d_FORWARD_WALL1_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_FORWARD_WALL:PETAL%d_FORWARD_WALL2_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber); + + TGeoCompositeShape* petalCaseComposite = new TGeoCompositeShape((TString)Form("PETALCASE%dsh", mPetalCaseNumber), petalCaseCompositeFormula); + mFullCompositeFormula = petalCaseComposite->GetName(); + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* kMedBe = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_BERYLLIUM"); + + mPetalCaseName = Form("PETALCASE%d", mPetalCaseNumber); + mPetalCaseVolume = new TGeoVolume(mPetalCaseName, petalCaseComposite, kMedBe); + mPetalCaseVolume->SetVisibility(1); + mPetalCaseVolume->SetLineColor(kGray); + + LOGP(info, "Creating IRIS Tracker vacuum petal case {}", mPetalCaseNumber); + LOGP(info, "Inserting {} in {} ", mPetalCaseVolume->GetName(), motherVolume->GetName()); + motherVolume->AddNode(mPetalCaseVolume, 1, nullptr); +} + +void TRKPetalCase::constructColdPlate(TGeoVolume* motherVolume) +{ + Double_t coldPlateRadius = 2.6; // cm + Double_t coldPlateThickness = 0.15; // cm + Double_t coldPlateLength = 50.; // cm + + mColdPlate = new TGeoTubeSeg((TString)Form("PETAL%d_COLDPLATE", mPetalCaseNumber), coldPlateRadius, coldPlateRadius + coldPlateThickness, coldPlateLength / 2., -0.5 * mAngularCoverageAzimuthalWall * mToDeg, 0.5 * mAngularCoverageAzimuthalWall * mToDeg); + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* medCeramic = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_CERAMIC"); + mColdPlateVolume = new TGeoVolume(Form("COLDPLATE%d", mPetalCaseNumber), mColdPlate, medCeramic); + + TString coldPlateCompositeFormula = mColdPlate->GetName(); + coldPlateCompositeFormula += ":"; + coldPlateCompositeFormula += mAzimuthalWallCombiTrans->GetName(); + addToPetalCaseComposite(coldPlateCompositeFormula); + + mColdPlateVolume->SetVisibility(1); + mColdPlateVolume->SetLineColor(kGray); + + LOGP(info, "Creating cold plate service"); + LOGP(info, "Inserting {} in {} ", mColdPlateVolume->GetName(), motherVolume->GetName()); + motherVolume->AddNode(mColdPlateVolume, 1, mAzimuthalWallCombiTrans); +} + +void TRKPetalCase::constructDetectionPetals(TGeoVolume* motherVolume) +{ + // Add petal layers + // layerNumber, layerName, rIn, angularCoverage, zLength, layerx2X0 + mPetalLayers.emplace_back(0, Form("%s_LAYER%d", mPetalCaseName.Data(), 0), 0.5f, mAngularCoverageAzimuthalWall, 50.f, 1.e-3); + mPetalLayers.emplace_back(1, Form("%s_LAYER%d", mPetalCaseName.Data(), 1), 1.2f, mAngularCoverageAzimuthalWall, 50.f, 1.e-3); + mPetalLayers.emplace_back(2, Form("%s_LAYER%d", mPetalCaseName.Data(), 2), 2.5f, mAngularCoverageAzimuthalWall, 50.f, 1.e-3); + for (Int_t i = 0; i < mPetalLayers.size(); ++i) { + mPetalLayers[i].createLayer(motherVolume, mAzimuthalWallCombiTrans); + } + + // Add petal disks + // diskNumber, diskName, zPos, rIn, rOut, angularCoverage, diskx2X0 + mPetalDisks.emplace_back(0, Form("%s_DISK%d", mPetalCaseName.Data(), 0), 26., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(1, Form("%s_DISK%d", mPetalCaseName.Data(), 1), 30., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(2, Form("%s_DISK%d", mPetalCaseName.Data(), 2), 34., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(3, Form("%s_DISK%d", mPetalCaseName.Data(), 3), -26., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(4, Form("%s_DISK%d", mPetalCaseName.Data(), 4), -30., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(5, Form("%s_DISK%d", mPetalCaseName.Data(), 5), -34., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + for (Int_t i = 0; i < mPetalDisks.size(); ++i) { + mPetalDisks[i].createDisk(motherVolume, mAzimuthalWallCombiTrans); + } + + addDetectionPetelsToFullComposite(); +} + +void TRKPetalCase::addDetectionPetelsToFullComposite() +{ + for (Int_t i = 0; i < mPetalLayers.size(); ++i) { + Double_t zLength = mPetalLayers[i].getZLength(); + Double_t rIn = mPetalLayers[i].getInnerRadius(); + Double_t thickness = mPetalLayers[i].getChipThickness(); + Double_t angularCoverage = mPetalLayers[i].getAngularCoverage(); + TGeoTubeSeg* layerForExcavation = new TGeoTubeSeg(Form("PETALCASE%d_EXCAVATIONLAYER%d", mPetalCaseNumber, i), rIn, rIn + thickness, zLength / 2., -0.5 * angularCoverage * mToDeg, 0.5 * angularCoverage * mToDeg); + + TString layerForExcavationCompositeFormula = layerForExcavation->GetName(); + layerForExcavationCompositeFormula += ":"; + layerForExcavationCompositeFormula += mAzimuthalWallCombiTrans->GetName(); + addToPetalCaseComposite(layerForExcavationCompositeFormula); + } + + for (Int_t i = 0; i < mPetalDisks.size(); ++i) { + Double_t zPos = mPetalDisks[i].getZ(); + Double_t rIn = mPetalDisks[i].getInnerRadius(); + Double_t rOut = mPetalDisks[i].getOuterRadius(); + Double_t thickness = mPetalDisks[i].getThickness(); + Double_t angularCoverage = mPetalDisks[i].getAngularCoverage(); + TGeoTubeSeg* diskForExcavation = new TGeoTubeSeg(Form("PETALCASE%d_EXCAVATIONDISK%d", mPetalCaseNumber, i), rIn, rOut, thickness / 2., -0.5 * angularCoverage * mToDeg, 0.5 * angularCoverage * mToDeg); + TGeoCombiTrans* diskForExcavationCombiTrans = new TGeoCombiTrans(*(mAzimuthalWallCombiTrans->MakeClone())); // Copy from petal case + diskForExcavationCombiTrans->SetName((TString)Form("PETALCASE%d_EXCAVATIONDISK%d_COMBITRANS", mPetalCaseNumber, i)); + diskForExcavationCombiTrans->SetDz(zPos); // Overwrite z location + diskForExcavationCombiTrans->RegisterYourself(); + + TString diskForExcavationCompositeFormula = diskForExcavation->GetName(); + diskForExcavationCompositeFormula += ":"; + diskForExcavationCompositeFormula += diskForExcavationCombiTrans->GetName(); + addToPetalCaseComposite(diskForExcavationCompositeFormula); + } +} + +// ClassImp(TRKPetalCase); +} // namespace trk +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx new file mode 100644 index 0000000000000..e24b24b48c882 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx @@ -0,0 +1,94 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file TRKPetalDisk.cxx +/// \brief Implementation of the TRKPetalDisk class + +#include "TRKSimulation/TRKPetalDisk.h" +#include "TRKBase/GeometryTGeo.h" + +#include // for LOG + +#include "TGeoManager.h" // for TGeoManager, gGeoManager +#include "TGeoMatrix.h" // for TGeoCombiTrans, TGeoRotation, etc +#include "TGeoTube.h" // for TGeoTube, TGeoTubeSeg +#include "TGeoVolume.h" // for TGeoVolume, TGeoVolumeAssembly +#include "TGeoCompositeShape.h" // for TGeoCompositeShape +#include "TMathBase.h" // for Abs +#include "TMath.h" // for Sin, RadToDeg, DegToRad, Cos, Tan, etc +#include "TGeoTube.h" + +#include // for snprintf + +namespace o2 +{ +namespace trk +{ + +TRKPetalDisk::TRKPetalDisk(Int_t diskNumber, std::string diskName, Float_t z, Float_t rIn, Float_t rOut, Float_t angularCoverage, Float_t Diskx2X0) +{ + // Creates a simple parametrized petal disk + mDiskNumber = diskNumber; + mDiskName = diskName; + mZ = z; + mAngularCoverage = angularCoverage; + mx2X0 = Diskx2X0; + mInnerRadius = rIn; + mOuterRadius = rOut; + Float_t Si_X0 = 9.5; + mChipThickness = Diskx2X0 * Si_X0; + + LOG(info) << "Creating TRK Disk " << mDiskNumber; + LOG(info) << " Using silicon X0 = " << Si_X0 << " to emulate disk radiation length."; + LOG(info) << " Disk z = " << mZ << " ; R_in = " << mInnerRadius << " ; R_out = " << mOuterRadius << " ; x2X0 = " << mx2X0 << " ; ChipThickness = " << mChipThickness; +} + +void TRKPetalDisk::createDisk(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans) +{ + // Create tube, set sensitive volume, add to mother volume + Double_t toDeg = 180 / TMath::Pi(); + std::string chipName = mDiskName + "_" + o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mDiskNumber), + sensName = mDiskName + "_" + Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mDiskNumber); + + mSensorName = sensName; + + TGeoTubeSeg* sensor = new TGeoTubeSeg(mInnerRadius, mOuterRadius, mChipThickness / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* chip = new TGeoTubeSeg(mInnerRadius, mOuterRadius, mChipThickness / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* disk = new TGeoTubeSeg(mInnerRadius, mOuterRadius, mChipThickness / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + + TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); + TGeoMedium* medAir = gGeoManager->GetMedium("TRK_AIR$"); + + TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); + sensVol->SetLineColor(kYellow); + TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); + chipVol->SetLineColor(kYellow); + TGeoVolume* diskVol = new TGeoVolume(mDiskName.c_str(), disk, medAir); + diskVol->SetLineColor(kYellow); + + LOG(info) << "Inserting " << sensVol->GetName() << " inside " << chipVol->GetName(); + chipVol->AddNode(sensVol, 1, nullptr); + + LOG(info) << "Inserting " << chipVol->GetName() << " inside " << diskVol->GetName(); + diskVol->AddNode(chipVol, 1, nullptr); + + // Finally put everything in the mother volume + TGeoCombiTrans* fwdPetalCombiTrans = new TGeoCombiTrans(*(combiTrans->MakeClone())); // Copy from petal case + fwdPetalCombiTrans->SetDz(mZ); // Overwrite z location + fwdPetalCombiTrans->RegisterYourself(); + + LOG(info) << "Inserting " << diskVol->GetName() << " inside " << motherVolume->GetName(); + motherVolume->AddNode(diskVol, 1, fwdPetalCombiTrans); +} +// ClassImp(TRKPetalLayer); + +} // namespace trk +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx new file mode 100644 index 0000000000000..c8ff0d957bb19 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx @@ -0,0 +1,79 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKSimulation/TRKPetalLayer.h" +#include "TRKBase/GeometryTGeo.h" + +#include "Framework/Logger.h" + +#include "TGeoTube.h" +#include "TGeoBBox.h" +#include "TGeoVolume.h" +#include "TGeoTube.h" +#include "TGeoMatrix.h" + +#include "TMath.h" + +namespace o2 +{ +namespace trk +{ +TRKPetalLayer::TRKPetalLayer(Int_t layerNumber, std::string layerName, Float_t rIn, Float_t angularCoverage, Float_t zLength, Float_t layerX2X0) + : mLayerNumber(layerNumber), mLayerName(layerName), mInnerRadius(rIn), mAngularCoverage(angularCoverage), mZ(zLength), mX2X0(layerX2X0), mModuleWidth(4.54) +{ + Float_t Si_X0 = 9.5f; + mChipThickness = mX2X0 * Si_X0; + LOGP(info, "Creating layer: id: {} rInner: {} thickness: {} zLength: {} x2X0: {}", mLayerNumber, mInnerRadius, mChipThickness, mZ, mX2X0); +} + +void TRKPetalLayer::createLayer(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans) +{ + TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); + TGeoMedium* medAir = gGeoManager->GetMedium("TRK_AIR$"); + + std::string staveName = mLayerName + "_" + o2::trk::GeometryTGeo::getTRKStavePattern() + std::to_string(mLayerNumber), + chipName = mLayerName + "_" + o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mLayerNumber), + sensName = mLayerName + "_" + Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mLayerNumber); + + mSensorName = sensName; + + Double_t toDeg = 180 / TMath::Pi(); + mLayer = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoVolume* layerVol = new TGeoVolume(mLayerName.c_str(), mLayer, medAir); + layerVol->SetLineColor(kYellow); + + TGeoTubeSeg* stave = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* chip = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* sensor = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + + TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); + sensVol->SetLineColor(kYellow); + TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); + chipVol->SetLineColor(kYellow); + TGeoVolume* staveVol = new TGeoVolume(staveName.c_str(), stave, medSi); + staveVol->SetLineColor(kYellow); + + LOGP(info, "Inserting {} in {} ", sensVol->GetName(), chipVol->GetName()); + chipVol->AddNode(sensVol, 1, nullptr); + + LOGP(info, "Inserting {} in {} ", chipVol->GetName(), staveVol->GetName()); + staveVol->AddNode(chipVol, 1, nullptr); + + LOGP(info, "Inserting {} in {} ", staveVol->GetName(), layerVol->GetName()); + layerVol->AddNode(staveVol, 1, nullptr); + + LOGP(info, "Inserting {} in {} ", layerVol->GetName(), motherVolume->GetName()); + motherVolume->AddNode(layerVol, 1, combiTrans); +} +// ClassImp(TRKPetalLayer); + +} // namespace trk +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx index 7937e3b4de09a..1fb966425f974 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx @@ -27,16 +27,6 @@ namespace o2 { namespace trk { -TRKServices::TRKServices(float rMin, float zLength, float thickness) -{ - mColdPlateRMin = rMin; - mColdPlateZLength = zLength; - mColdPlateThickness = thickness; - mZLengthIRISVacV = 70.; - mThicknessIRISVacV = 150.e-4; - mRInIRISVacV = 0.48; - mROutIRISVacV = mColdPlateRMin + mColdPlateThickness; -} void TRKServices::createMaterials() { @@ -63,6 +53,7 @@ void TRKServices::createMaterials() float zAir[4] = {6., 7., 8., 18.}; float wAir[4] = {0.000124, 0.755267, 0.231781, 0.012827}; float dAir = 1.20479E-3; + float dAir1 = 1.20479E-11; // vacuum denisity inside pipe // Water float aWater[2] = {1.00794, 15.9994}; @@ -115,6 +106,7 @@ void TRKServices::createMaterials() matmgr.Mixture("ALICE3_TRKSERVICES", 74, "ALUMINIUM5083", aAl5083, zAl5083, dAl5083, 9, wAl5083); // AL5083 - Candidate for IRIS vacuum vessel matmgr.Mixture("ALICE3_TRKSERVICES", 75, "ALUMINIUMBERYLLIUMMETAL", aAlBeMet, zAlBeMet, dAlBeMet, 2, wAlBeMet); // Aluminium-Beryllium metal - Candidate for IRIS vacuum vessel matmgr.Material("ALICE3_TRKSERVICES", 76, "CARBONFIBERM55J6K", 12.0107, 6, 1.92, 999, 999); // Carbon Fiber M55J + matmgr.Mixture("ALICE3_PIPE", 77, "VACUUM", aAir, zAir, dAir1, 4, wAir); matmgr.Medium("ALICE3_TRKSERVICES", 1, "CERAMIC", 66, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Ceramic for cold plate matmgr.Medium("ALICE3_TRKSERVICES", 2, "COPPER", 67, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Copper for cables @@ -127,56 +119,62 @@ void TRKServices::createMaterials() matmgr.Medium("ALICE3_TRKSERVICES", 9, "ALUMINIUM5083", 74, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Al5083 for IRIS vacuum vessel matmgr.Medium("ALICE3_TRKSERVICES", 10, "ALUMINIUMBERYLLIUMMETAL", 75, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // AlBeMet for IRIS vacuum vessel matmgr.Medium("ALICE3_TRKSERVICES", 11, "CARBONFIBERM55J6K", 76, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Carbon Fiber M55J + matmgr.Medium("ALICE3_PIPE", 12, "VACUUM", 77, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Vacuum inside the beam pipe } void TRKServices::createServices(TGeoVolume* motherVolume) { createMaterials(); - createColdplate(motherVolume); + createVacuumCompositeShape(); createMiddleServices(motherVolume); createOuterDisksServices(motherVolume); createOuterBarrelServices(motherVolume); } -void TRKServices::createColdplate(TGeoVolume* motherVolume) +void TRKServices::createVacuumCompositeShape() +{ + Double_t pipeRIn = 1.8f; + Double_t A3IPLength = 1000.f; + Double_t vacuumVesselRIn = 5.6f; + Double_t vacuumVesselThickness = 0.08f; + Double_t vacuumVesselLength = 76.f; + + // Vacuum for A and C Side + Double_t vacuumASideLength = A3IPLength / 2. - vacuumVesselThickness - vacuumVesselLength / 2.; + Double_t vacuumCSideLength = A3IPLength / 2. + vacuumVesselLength / 2.; + + // Vacuum tubes + TGeoTube* vacuumASide = new TGeoTube("VACUUM_Ash", 0., pipeRIn, vacuumASideLength / 2.); + TGeoTube* vacuumCSide = new TGeoTube("VACUUM_Csh", 0., vacuumVesselRIn, vacuumCSideLength / 2.); + + // Vacuum positions + TGeoTranslation* posVacuumASide = new TGeoTranslation("VACUUM_ASIDE_POSITION", 0, 0, vacuumVesselLength / 2. + vacuumVesselThickness + vacuumASideLength / 2.); + posVacuumASide->RegisterYourself(); + TGeoTranslation* posVacuumCSide = new TGeoTranslation("VACUUM_CSIDE_POSITION", 0, 0, vacuumVesselLength / 2. - vacuumCSideLength / 2.); + posVacuumCSide->RegisterYourself(); + + mVacuumCompositeFormula = + "VACUUM_Ash:VACUUM_ASIDE_POSITION" + "+VACUUM_Csh:VACUUM_CSIDE_POSITION"; +} + +void TRKServices::excavateFromVacuum(TString shapeToExcavate) +{ + mVacuumCompositeFormula += "-"; + mVacuumCompositeFormula += shapeToExcavate; +} + +void TRKServices::registerVacuum(TGeoVolume* motherVolume) { auto& matmgr = o2::base::MaterialManager::Instance(); - const TGeoMedium* medCeramic = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_CERAMIC"); - - TGeoTube* coldPlate = new TGeoTube("TRK_COLDPLATEsh", mColdPlateRMin, mColdPlateRMin + mColdPlateThickness, mColdPlateZLength / 2.); - TGeoVolume* coldPlateVolume = new TGeoVolume("TRK_COLDPLATE", coldPlate, medCeramic); - coldPlateVolume->SetVisibility(1); - coldPlateVolume->SetLineColor(kGray); - - LOGP(info, "Creating cold plate service"); - - LOGP(info, "Inserting {} in {} ", coldPlateVolume->GetName(), motherVolume->GetName()); - motherVolume->AddNode(coldPlateVolume, 1, nullptr); - - // IRIS Tracker Vacuum Vessel - TGeoTube* irisVacuumVesselInnerTube = new TGeoTube("TRK_IRISVACUUMVESSEL_INNERTUBEsh", mRInIRISVacV, mRInIRISVacV + mThicknessIRISVacV, mZLengthIRISVacV / 2.); - TGeoTube* irisVacuumVesselOuterTube = new TGeoTube("TRK_IRISVACUUMVESSEL_OUTERTUBEsh", mROutIRISVacV, mROutIRISVacV + mThicknessIRISVacV, mZLengthIRISVacV / 2.); - TGeoTube* irisVacuumVesselWall = new TGeoTube("TRK_IRISVACUUMVESSEL_WALLsh", mRInIRISVacV, mROutIRISVacV + mThicknessIRISVacV, mThicknessIRISVacV / 2.); - TGeoTranslation* irisVacVWallNegZ = new TGeoTranslation("IRISVACVWALLNEGZ", 0., 0., -mZLengthIRISVacV / 2. - mThicknessIRISVacV / 2.); - irisVacVWallNegZ->RegisterYourself(); - TGeoTranslation* irisVacVWallPosZ = new TGeoTranslation("IRISVACVWALLPOSZ", 0., 0., mZLengthIRISVacV / 2. + mThicknessIRISVacV / 2.); - irisVacVWallPosZ->RegisterYourself(); - TString irisCompositeFormula = - "TRK_IRISVACUUMVESSEL_INNERTUBEsh" - "+TRK_IRISVACUUMVESSEL_OUTERTUBEsh" - "+TRK_IRISVACUUMVESSEL_WALLsh:IRISVACVWALLNEGZ" - "+TRK_IRISVACUUMVESSEL_WALLsh:IRISVACVWALLPOSZ"; - TGeoCompositeShape* irisVacuumVesselComposite = new TGeoCompositeShape("TRK_IRISVACUUMVESSELsh", irisCompositeFormula); - - const TGeoMedium* medBe = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_BERYLLIUM"); - TGeoVolume* irisVacuumVesselVolume = new TGeoVolume("TRK_IRISVACUUMVESSEL", irisVacuumVesselComposite, medBe); - - irisVacuumVesselVolume->SetVisibility(1); - irisVacuumVesselVolume->SetLineColor(kGray); - - LOGP(info, "Creating IRIS Tracker vacuum vessel"); - LOGP(info, "Inserting {} in {} ", irisVacuumVesselVolume->GetName(), motherVolume->GetName()); - motherVolume->AddNode(irisVacuumVesselVolume, 1, nullptr); + const TGeoMedium* kMedVac = matmgr.getTGeoMedium("ALICE3_PIPE_VACUUM"); + + TGeoCompositeShape* vacuumComposite = new TGeoCompositeShape("A3IP_VACUUMsh", mVacuumCompositeFormula); + TGeoVolume* vacuumVolume = new TGeoVolume("A3IP_VACUUM", vacuumComposite, kMedVac); + + // Add the vacuum to the barrel + vacuumVolume->SetLineColor(kGreen - 3); + motherVolume->AddNode(vacuumVolume, 1, new TGeoTranslation(0, 0, 0)); } void TRKServices::createOuterDisksServices(TGeoVolume* motherVolume) @@ -459,7 +457,7 @@ void TRKServices::createOuterBarrelServices(TGeoVolume* motherVolume) // Fiber 0.269 cm const float siO2FiberThick = 0.5 * 0.269; const float peFiberThick = 0.5 * 0.269; - float rMinOuterBarrelServices = ((TGeoTube*)motherVolume->GetNode(Form("%s10_1", GeometryTGeo::getTRKLayerPattern()))->GetVolume()->GetShape())->GetRmax(); + float rMinOuterBarrelServices = ((TGeoTube*)motherVolume->GetNode(Form("%s7_1", GeometryTGeo::getTRKLayerPattern()))->GetVolume()->GetShape())->GetRmax(); const float zLengthOuterBarrelServices = 350.f; // 175cm TGeoTube* outerBarrelFiberSIO2 = new TGeoTube("TRK_OUTERBARREL_FIBER_SIO2sh", rMinOuterBarrelServices, rMinOuterBarrelServices + siO2FiberThick, zLengthOuterBarrelServices); diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h index 1b0181144b5d4..b82d8879e7dad 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h @@ -15,7 +15,10 @@ #pragma link off all classes; #pragma link off all functions; +#pragma link C++ class o2::trk::TRKPetalCase + ; #pragma link C++ class o2::trk::TRKLayer + ; +#pragma link C++ class o2::trk::TRKPetalLayer + ; +#pragma link C++ class o2::trk::TRKPetalDisk + ; #pragma link C++ class o2::trk::TRKServices + ; #pragma link C++ class o2::trk::Detector + ; #pragma link C++ class o2::base::DetImpl < o2::trk::Detector> + ; diff --git a/macro/build_geometry.C b/macro/build_geometry.C index 12d3842239874..93b06eb4bfeae 100644 --- a/macro/build_geometry.C +++ b/macro/build_geometry.C @@ -167,7 +167,7 @@ void build_geometry(FairRunSim* run = nullptr) #ifdef ENABLE_UPGRADES // upgraded beampipe at the interaction point (IP) if (isActivated("A3IP")) { - run->AddModule(new o2::passive::Alice3Pipe("A3IP", "Alice 3 beam pipe", !isActivated("TRK"), !isActivated("FT3"), 1.8f, 0.08f, 1000.f, 5.6f, 0.08f, 76.f)); + run->AddModule(new o2::passive::Alice3Pipe("A3IP", "Alice 3 beam pipe", 1.8f, 0.08f, 1000.f, 5.6f, 0.08f, 76.f)); } // the absorber From ced3e8ada32e6a6393745a811f46fe37cd441448 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Thu, 13 Mar 2025 09:15:11 +0100 Subject: [PATCH 0046/1764] GPU: Remove thrust deprecated declarations in ITS code --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 4fa7913c10e82..ce93523319e99 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -201,7 +201,7 @@ struct equal_tracklets { }; template -struct pair_to_first : public thrust::unary_function, T1> { +struct pair_to_first { GPUhd() int operator()(const gpuPair& a) const { return a.first; @@ -209,7 +209,7 @@ struct pair_to_first : public thrust::unary_function, T1> { }; template -struct pair_to_second : public thrust::unary_function, T2> { +struct pair_to_second { GPUhd() int operator()(const gpuPair& a) const { return a.second; @@ -710,7 +710,7 @@ GPUg() void printPointersKernel(std::tuple args) } template -struct trackletSortEmptyFunctor : public thrust::binary_function { +struct trackletSortEmptyFunctor { GPUhd() bool operator()(const T& lhs, const T& rhs) const { return lhs.firstClusterIndex > rhs.firstClusterIndex; @@ -718,7 +718,7 @@ struct trackletSortEmptyFunctor : public thrust::binary_function { }; template -struct trackletSortIndexFunctor : public thrust::binary_function { +struct trackletSortIndexFunctor { GPUhd() bool operator()(const T& lhs, const T& rhs) const { return lhs.firstClusterIndex < rhs.firstClusterIndex || (lhs.firstClusterIndex == rhs.firstClusterIndex && lhs.secondClusterIndex < rhs.secondClusterIndex); From 5ec817dba37076f67a1fe7cbc28c189641a30e1f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 13:05:34 +0100 Subject: [PATCH 0047/1764] GPU: Remove obsolete option to run TrackletSelector not in pipeline --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 -- .../Base/GPUReconstructionIncludes.h | 3 - .../Definitions/GPUDefGPUParameters.h | 8 --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 57 +------------------ .../SectorTracker/GPUTPCTracker.cxx | 2 +- GPU/GPUTracking/kernels.cmake | 2 +- 7 files changed, 3 insertions(+), 76 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index a7e0c2cb827f1..4b767a6b8a8f7 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -273,9 +273,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.trackletConstructorInPipeline < 0) { mProcessingSettings.trackletConstructorInPipeline = 1; } - if (mProcessingSettings.trackletSelectorInPipeline < 0) { - mProcessingSettings.trackletSelectorInPipeline = 1; - } if (mProcessingSettings.trackletSelectorSectors < 0) { mProcessingSettings.trackletSelectorSectors = 1; } @@ -296,9 +293,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() UpdateAutomaticProcessingSettings(); GPUCA_GPUReconstructionUpdateDefaults(); - if (!mProcessingSettings.trackletConstructorInPipeline) { - mProcessingSettings.trackletSelectorInPipeline = false; - } if (!mProcessingSettings.rtc.enable) { mProcessingSettings.rtc.optConstexpr = false; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index b35613f3bec59..f18ab21dc3972 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -33,9 +33,6 @@ if (mProcessingSettings.trackletConstructorInPipeline < 0) { \ mProcessingSettings.trackletConstructorInPipeline = GPUCA_CONSTRUCTOR_IN_PIPELINE; \ } \ - if (mProcessingSettings.trackletSelectorInPipeline < 0) { \ - mProcessingSettings.trackletSelectorInPipeline = GPUCA_SELECTOR_IN_PIPELINE; \ - } \ if (mProcessingSettings.trackletSelectorSectors < 0) { \ mProcessingSettings.trackletSelectorSectors = GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT; \ } \ diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index c40659ecd2632..772b4684b590e 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -85,7 +85,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -151,7 +150,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -217,7 +215,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -275,7 +272,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -537,9 +533,6 @@ #ifndef GPUCA_CONSTRUCTOR_IN_PIPELINE #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #endif - #ifndef GPUCA_SELECTOR_IN_PIPELINE - #define GPUCA_SELECTOR_IN_PIPELINE 0 - #endif #ifndef GPUCA_ALTERNATE_BORDER_SORT #define GPUCA_ALTERNATE_BORDER_SORT 0 #endif @@ -562,7 +555,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 0 #define GPUCA_SORT_BEFORE_FIT 0 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 3c31a4fbb8409..5663aed8033b7 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -259,7 +259,6 @@ AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") AddOption(trackletSelectorSectors, int8_t, -1, "", 0, "Number of sectors to processes in parallel at max") AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") -AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit") AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index dd7fe285265ad..4b9b8c33a0887 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -226,7 +226,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { runKernel({GetGridAuto(useStream), {iSector}}); runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); if (GetProcessingSettings().deterministicGPUReconstruction) { @@ -250,56 +250,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() ReleaseEvent(mEvents->init); } - if (!GetProcessingSettings().trackletSelectorInPipeline) { - if (GetProcessingSettings().trackletConstructorInPipeline) { - SynchronizeGPU(); - } else { - for (int32_t i = 0; i < mRec->NStreams(); i++) { - RecordMarker(&mEvents->stream[i], i); - } - runKernel({GetGridAuto(0), krnlRunRangeNone, {&mEvents->single, mEvents->stream, mRec->NStreams()}}); - for (int32_t i = 0; i < mRec->NStreams(); i++) { - ReleaseEvent(mEvents->stream[i]); - } - SynchronizeEventAndRelease(mEvents->single); - } - - if (GetProcessingSettings().debugLevel >= 4) { - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, processors()->tpcTrackers[iSector], &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - } - } - - int32_t runSectors = 0; - int32_t useStream = 0; - for (uint32_t iSector = 0; iSector < NSECTORS; iSector += runSectors) { - if (runSectors < GetProcessingSettings().trackletSelectorSectors) { - runSectors++; - } - runSectors = CAMath::Min(runSectors, NSECTORS - iSector); - if (getKernelProperties().minBlocks * BlockCount() < (uint32_t)runSectors) { - runSectors = getKernelProperties().minBlocks * BlockCount(); - } - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Running TPC Tracklet selector (Stream %d, Sector %d to %d)", useStream, iSector, iSector + runSectors); - } - runKernel({GetGridAuto(useStream), {iSector, runSectors}}); - runKernel({{1, -ThreadCount(), useStream}, {iSector}}, runSectors); - for (uint32_t k = iSector; k < iSector + runSectors; k++) { - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {k}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->sector[k]); - streamMap[k] = useStream; - } - useStream++; - if (useStream >= mRec->NStreams()) { - useStream = 0; - } - } - } - mSectorSelectorReady = 0; std::array transferRunning; @@ -335,11 +285,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() processors()->tpcTrackers[iSector].DumpHitWeights(*mDebugFile); } } - if (!GetProcessingSettings().trackletSelectorInPipeline) { - if (GetProcessingSettings().debugMask & 512) { - processors()->tpcTrackers[iSector].DumpTrackHits(*mDebugFile); - } - } } if (transferRunning[iSector]) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 63c64f78cc095..28521b2987a45 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -93,7 +93,7 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletConstructorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorLinks", reLinks); mMemoryResSectorScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSectorScratch"); diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index c973264bfde2a..a823fc853e5b1 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -33,7 +33,7 @@ o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRAC o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB both) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) From 1b8388da5e43bbc506e62f0ad1f1d1b9449e87c6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 14:34:06 +0100 Subject: [PATCH 0048/1764] GPU: Remove obsolete preprocessor magic to create 2 different kernels for single-slice and multi-slice --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 37 ++-- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 6 +- .../Base/GPUReconstructionKernelMacros.h | 82 ++------ .../Base/GPUReconstructionKernels.h | 11 +- .../Base/cuda/GPUReconstructionCUDA.cu | 22 +- .../Base/cuda/GPUReconstructionCUDA.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 47 ++--- .../GPUReconstructionCUDAkernel.template.cu | 4 +- .../GPUReconstructionHIPkernel.template.hip | 4 +- .../Base/opencl/GPUReconstructionOCL.cl | 6 +- .../Base/opencl/GPUReconstructionOCL.h | 6 +- .../opencl/GPUReconstructionOCLKernels.cxx | 37 +--- GPU/GPUTracking/Global/GPUChain.h | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 41 ++-- GPU/GPUTracking/kernels.cmake | 194 +++++++++--------- 15 files changed, 201 insertions(+), 300 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 969dd06d6297e..f5d350b4064d0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -66,28 +66,25 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu if (x.nThreads != 1) { throw std::runtime_error("Cannot run device kernel on host with nThreads != 1"); } - uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num; - for (uint32_t k = 0; k < num; k++) { - int32_t nThreads = getNKernelHostThreads(false); - if (nThreads > 1) { - if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d Threads\n", nThreads); - } - tbb::this_task_arena::isolate([&] { - mThreading->activeThreads->execute([&] { - tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { - typename T::GPUSharedMemory smem; - for (uint32_t iB = r.begin(); iB < r.end(); iB++) { - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); - } - }); + int32_t nThreads = getNKernelHostThreads(false); + if (nThreads > 1) { + if (mProcessingSettings.debugLevel >= 5) { + printf("Running %d Threads\n", nThreads); + } + tbb::this_task_arena::isolate([&] { + mThreading->activeThreads->execute([&] { + tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { + typename T::GPUSharedMemory smem; + for (uint32_t iB = r.begin(); iB < r.end(); iB++) { + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.index], args...); + } }); }); - } else { - for (uint32_t iB = 0; iB < x.nBlocks; iB++) { - typename T::GPUSharedMemory smem; - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); - } + }); + } else { + for (uint32_t iB = 0; iB < x.nBlocks; iB++) { + typename T::GPUSharedMemory smem; + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.index], args...); } } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 7901c34866c66..f90820281c74d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -49,7 +49,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels @@ -77,7 +77,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ { \ if (cpuFallback) { \ @@ -161,7 +161,7 @@ inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) throw std::runtime_error("GPUCA_MAX_THREADS exceeded"); } if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Running kernel %s (Stream %d, Range %d/%d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.start, setup.y.num, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); + GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); } if (nThreads == 0 || nBlocks == 0) { return; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 41abc8725c07b..f80b324970dc9 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -51,97 +51,45 @@ #define GPUCA_ATTRRES3(XX) // 3 attributes not supported #define GPUCA_ATTRRES2(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES2_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) #define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) -// GPU Kernel entry point for single sector -#define GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSector_internal GPUCA_M_STRIP(x_arguments)) -#ifdef GPUCA_KRNL_DEFONLY -#define GPUCA_KRNLGPU_SINGLE(...) GPUCA_KRNLGPU_SINGLE_DEF(__VA_ARGS__); -#else -#define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ - { \ - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSector_internal] GPUCA_M_STRIP(x_forward)); \ - } -#endif -// GPU Kernel entry point for multiple sector -#define GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSector, int32_t nSectorCount GPUCA_M_STRIP(x_arguments)) +// GPU Kernel entry point +#define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ + GPUg() void GPUCA_ATTRRES(,GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) + #ifdef GPUCA_KRNL_DEFONLY -#define GPUCA_KRNLGPU_MULTI(...) GPUCA_KRNLGPU_MULTI_DEF(__VA_ARGS__); +#define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); #else -#define GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ +#define GPUCA_KRNLGPU(x_class, x_attributes, x_arguments, x_forward, ...) \ + GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ - const int32_t iSector_internal = nSectorCount * (get_group_id(0) + (get_num_groups(0) % nSectorCount != 0 && nSectorCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ - const int32_t nSectorBlockOffset = get_num_groups(0) * iSector_internal / nSectorCount; \ - const int32_t sectorBlockId = get_group_id(0) - nSectorBlockOffset; \ - const int32_t sectorGridDim = get_num_groups(0) * (iSector_internal + 1) / nSectorCount - get_num_groups(0) * (iSector_internal) / nSectorCount; \ GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(sectorGridDim, get_local_size(0), sectorBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSector + iSector_internal] GPUCA_M_STRIP(x_forward)); \ + GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[_iSector_internal] GPUCA_M_STRIP(x_forward)); \ } #endif -// GPU Host wrapper pre- and post-parts -#define GPUCA_KRNL_PRE(x_class, ...) \ +// GPU Host wrappers for kernel +#define GPUCA_KRNL_HOST(x_class, ...) \ + GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ { \ auto& x = _xyz.x; \ - auto& y = _xyz.y; - -#define GPUCA_KRNL_POST() \ + auto& y = _xyz.y; \ + GPUCA_KRNL_CALL(x_class, __VA_ARGS__) \ } \ }; -// GPU Host wrappers for single kernel, multi-sector, or auto-detection -#define GPUCA_KRNL_single(...) \ - GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \ - GPUCA_KRNL_PRE(__VA_ARGS__) \ - if (y.num > 1) { \ - throw std::runtime_error("Kernel called with invalid number of sectors"); \ - } else { \ - GPUCA_KRNL_CALL_single(__VA_ARGS__) \ - } \ - GPUCA_KRNL_POST() - -#define GPUCA_KRNL_multi(...) \ - GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \ - GPUCA_KRNL_PRE(__VA_ARGS__) \ - GPUCA_KRNL_CALL_multi(__VA_ARGS__) \ - GPUCA_KRNL_POST() - -#define GPUCA_KRNL_(...) GPUCA_KRNL_single(__VA_ARGS__) -#define GPUCA_KRNL_simple(...) GPUCA_KRNL_single(__VA_ARGS__) -#define GPUCA_KRNL_both(...) \ - GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \ - GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \ - GPUCA_KRNL_PRE(__VA_ARGS__) \ - if (y.num <= 1) { \ - GPUCA_KRNL_CALL_single(__VA_ARGS__) \ - } else { \ - GPUCA_KRNL_CALL_multi(__VA_ARGS__) \ - } \ - GPUCA_KRNL_POST() - -#define GPUCA_KRNL_LOAD_(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__) -#define GPUCA_KRNL_LOAD_simple(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__) -#define GPUCA_KRNL_LOAD_both(...) \ - GPUCA_KRNL_LOAD_single(__VA_ARGS__) \ - GPUCA_KRNL_LOAD_multi(__VA_ARGS__) - #define GPUCA_KRNL_PROP(x_class, x_attributes) \ template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes)))}; \ + gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } -// Generate GPU kernel and host wrapper -#define GPUCA_KRNL_WRAP(x_func, x_class, x_attributes, ...) GPUCA_M_CAT(x_func, GPUCA_M_STRIP_FIRST(x_attributes))(x_class, x_attributes, __VA_ARGS__) #endif // GPUCA_GPUCODE -#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (GPUCA_M_STRIP(x_attributes), REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class)))), __VA_ARGS__) +#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) #endif // O2_GPU_GPURECONSTRUCTIONKERNELMACROS_H // clang-format on diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index d541e36a06af9..ba30f38e902ad 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -41,11 +41,8 @@ struct krnlExec { }; struct krnlRunRange { constexpr krnlRunRange() = default; - constexpr krnlRunRange(uint32_t a) : start(a), num(0) {} - constexpr krnlRunRange(uint32_t s, int32_t n) : start(s), num(n) {} - - uint32_t start = 0; - int32_t num = 0; + constexpr krnlRunRange(uint32_t v) : index(v) {} + uint32_t index = 0; }; struct krnlEvent { constexpr krnlEvent(deviceEvent* e = nullptr, deviceEvent* el = nullptr, int32_t n = 1) : ev(e), evList(el), nEvents(n) {} @@ -63,7 +60,7 @@ struct krnlProperties { }; struct krnlSetup { - krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0, -1}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} + krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} krnlExec x; krnlRunRange y; krnlEvent z; @@ -98,7 +95,7 @@ class GPUReconstructionKernels : public T template using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; -#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ virtual void runKernelImpl(const krnlSetupArgs& args) \ { \ T::template runKernelBackend(args); \ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 7fb3744551953..d2adc3cc1fd19 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -380,7 +380,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUFailedMsg(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL - loadKernelModules(true, false); + loadKernelModules(true); } #endif void* devPtrConstantMem = nullptr; @@ -630,13 +630,10 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() } } -void GPUReconstructionCUDA::loadKernelModules(bool perKernel, bool perSingleMulti) +void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) \ - j += !perSingleMulti; -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ +#define GPUCA_KRNL(x_class, ...) \ getRTCkernelNum(mInternals->kernelFunctions.size()); \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ @@ -644,20 +641,9 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel, bool perSingleMult GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ } \ GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ - j += perSingleMulti; -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ - mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi))); \ - if (mProcessingSettings.debugLevel >= 3) { \ - GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)), j); \ - } \ - GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)))); \ - j += perSingleMulti; + j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi if (j != mInternals->kernelModules.size()) { GPUFatal("Did not load all kernels (%u < %u)", j, (uint32_t)mInternals->kernelModules.size()); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index ee2f069028d74..dde70b9076e08 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -98,7 +98,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels 1)], args...); - if (y.num <= 1) { - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); - } else { - pArgs[arg_offset + 1] = &y.num; - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); - } + pArgs[arg_offset] = &y.index; + GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); + GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -88,33 +83,31 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); -#else +#else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 -#define GPUCA_KRNL_DEFONLY +#define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_PROP(x_class, x_attributes) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_, x_class, x_attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ + GPUCA_KRNL_PROP(x_class, x_attributes) \ + GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types) \ template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); + #ifndef __HIPCC__ // CUDA version -#define GPUCA_KRNL_CALL_single(x_class, ...) \ - GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.start, args...); -#define GPUCA_KRNL_CALL_multi(x_class, ...) \ - GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.start, y.num, args...); +#define GPUCA_KRNL_CALL(x_class, ...) \ + GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); #else // HIP version #undef GPUCA_KRNL_CUSTOM #define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args) -#define GPUCA_KRNL_CALL_single(x_class, ...) \ - hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.start, args...); -#define GPUCA_KRNL_CALL_multi(x_class, ...) \ - hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.start, y.num, args...); +#define GPUCA_KRNL_CALL(x_class, ...) \ + hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); #endif // __HIPCC__ -#endif + +#endif // ---------- COMPILE_MODE = onefile | rdc ---------- #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -137,13 +130,9 @@ int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& kernels) { -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU_SINGLE(__VA_ARGS__))); -#define GPUCA_KRNL_LOAD_multi(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU_MULTI(__VA_ARGS__))); +#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi } #ifndef GPUCA_NO_CONSTANT_MEMORY diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu index bcf61eb07383f..3140c6b9158ad 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu @@ -15,9 +15,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #include "GPUReconstructionCUDAIncludesHost.h" #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__); -#define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__); +#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__); #include "GPUReconstructionKernelMacros.h" // clang-format off diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip index ddbc9285763a9..427938a3bd704 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip @@ -15,9 +15,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #include "GPUReconstructionHIPIncludesHost.h" #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__); -#define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__); +#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__); #include "GPUReconstructionKernelMacros.h" // clang-format off diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 4a3cda6c2cddc..10a425e4c76e8 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -75,14 +75,10 @@ typedef signed char int8_t; // if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO! -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) -#define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__) +#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__) #define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() GPUConstantMem* pConstant, #define GPUCA_CONSMEM (*pConstant) #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi // clang-format on diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 15015cdcb43c5..5132baa444cd9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -56,9 +56,9 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase void RecordMarker(deviceEvent* ev, int32_t stream) override; template - int32_t AddKernel(bool multi = false); + int32_t AddKernel(); template - uint32_t FindKernel(int32_t num); + uint32_t FindKernel(); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template @@ -69,7 +69,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase template void runKernelBackend(const krnlSetupArgs& args); - template + template S& getKernelObject(); int32_t GetOCLPrograms(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 8a1c8a6525c0d..8a6c889773cb0 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -24,15 +24,11 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { - cl_kernel k = _xyz.y.num > 1 ? getKernelObject() : getKernelObject(); + cl_kernel k = getKernelObject(); auto& x = _xyz.x; auto& y = _xyz.y; auto& z = _xyz.z; - if (y.num <= 1) { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...)); - } else { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...)); - } + GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); cl_event ev; cl_event* evr; @@ -63,12 +59,9 @@ void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs -inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) +inline uint32_t GPUReconstructionOCLBackend::FindKernel() { std::string name(GetKernelName()); - if (num > 1) { - name += "_multi"; - } for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { if (mInternals->kernels[k].second == name) { @@ -80,12 +73,9 @@ inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) } template -int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) +int32_t GPUReconstructionOCLBackend::AddKernel() { std::string name(GetKernelName()); - if (multi) { - name += "_multi"; - } std::string kname("krnl_" + name); cl_int ocl_error; @@ -98,30 +88,21 @@ int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) return 0; } -template +template S& GPUReconstructionOCLBackend::getKernelObject() { - static uint32_t krnl = FindKernel(MULTI ? 2 : 1); + static uint32_t krnl = FindKernel(); return mInternals->kernels[krnl].first; } int32_t GPUReconstructionOCLBackend::AddKernels() { -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ - if (AddKernel(false)) { \ - return 1; \ - } -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - if (AddKernel(true)) { \ - return 1; \ +#define GPUCA_KRNL(x_class, ...) \ + if (AddKernel()) { \ + return 1; \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi - return 0; } diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index e017d9b60a269..b9da1c9a330d3 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -34,7 +34,7 @@ class GPUChain using krnlExec = gpu_reconstruction_kernels::krnlExec; using krnlEvent = gpu_reconstruction_kernels::krnlEvent; using deviceEvent = gpu_reconstruction_kernels::deviceEvent; - static constexpr krnlRunRange krnlRunRangeNone{0, -1}; + static constexpr krnlRunRange krnlRunRangeNone{0}; static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0}; virtual ~GPUChain() = default; diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index b05fa19785dd8..1f35b6fc468b2 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -20,18 +20,36 @@ define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") -function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) - math(EXPR TMP_CHK "${ARGC} & 1") - if(${TMP_CHK}) - message(FATAL_ERROR "Invalid number of arguments to kernel ${TMP_CHK}, must be odd to have pairs of argument type, argument name") - endif() +function(o2_gpu_add_kernel kernel_name kernel_files) list(LENGTH ARGV n) + if(${n} GREATER 2) + set(kernel_options "${ARGV2}") + else() + set(kernel_options "") + endif() + if(kernel_options MATCHES "^LB") + set(TMP_BOUNDS "_LB") + elseif(kernel_options MATCHES "^NO" OR kernel_options STREQUAL "") + set(TMP_BOUNDS "") + else() + message(FATAL_ERROR "Invalid kernel options, must contain bounds at first") + endif() + string(LENGTH "${kernel_options}" TMP_CHK) + if(${TMP_CHK} GREATER 3) + string(SUBSTRING "${kernel_options}" 3 -1 kernel_extra) + else() + set(kernel_extra "") + endif() set(OPT1 "") set(OPT2 "") set(OPT3 "") - if(${n} GREATER 4) + if(${n} GREATER 3) + math(EXPR TMP_CHK "${ARGC} & 1") + if(NOT ${TMP_CHK}) + message(FATAL_ERROR "Invalid number of arguments to kernel ${ARGC}, must be odd to have pairs of argument type, argument name") + endif() math(EXPR n "${n} - 1") - foreach(i RANGE 4 ${n} 2) + foreach(i RANGE 3 ${n} 2) math(EXPR j "${i} + 1") if(${ARGV${i}} MATCHES "\\*$") string(APPEND OPT1 ",GPUPtr1(${ARGV${i}},${ARGV${j}})") @@ -43,16 +61,9 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) string(APPEND OPT3 ",${ARGV${i}}") endforeach() endif() - if(kernel_bounds MATCHES "^LB") - set(TMP_BOUNDS "_LB") - elseif(kernel_bounds MATCHES "^NO") - set(TMP_BOUNDS "") - else() - message(FATAL_ERROR "Invalid bounds") - endif() set(TMP_PRE "") set(TMP_POST "") - set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_type}), (${OPT1}), (${OPT2}), (${OPT3}))\n") + set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_extra}), (${OPT1}), (${OPT2}), (${OPT3}))\n") separate_arguments(kernel_files NATIVE_COMMAND ${kernel_files}) list(GET kernel_files 0 TMP_KERNEL_CLASS_FILE) if (TMP_KERNEL_CLASS_FILE STREQUAL "=") diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index a823fc853e5b1..4b7aab75519fa 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -27,100 +27,100 @@ o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) -o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) -o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) -o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB simple int32_t mode) -o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB simple int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t id) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t useOrigTrackParam int8_t mergeAll) -o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t output) -o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t border0 int32_t border1 int8_t useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) -o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB simple GPUTRDTrackerGPU* externalInstance) -o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB simple GPUTRDTracker* externalInstance) -o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB simple) -o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB simple int32_t trackStart int32_t trackEnd) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB simple int32_t sectorStart int32_t nSectors) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB single int8_t setPositions) -o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB single int8_t onlyMC) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER" NO single) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO single GPUTPCLinearLabels* out) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t stage) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB single int32_t iBuf "uint32_t" offset int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t stage ChargePos* in ChargePos* out) -o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB single int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB single int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB single int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFGather" "=" LB single o2::tpc::ClusterNative* dest) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB simple) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB simple) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) +o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) +o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) +o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode) +o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t id) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t useOrigTrackParam int8_t mergeAll) +o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t output) +o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t border0 int32_t border1 int8_t useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) +o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") +o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) +o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) +o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB int32_t trackStart int32_t trackEnd) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB int32_t sectorStart int32_t nSectors) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) +o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf "uint32_t" offset int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage ChargePos* in ChargePos* out) +o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) From 4795ce795401ca4e8c40ca150da0f820e6001150 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 22:12:24 +0100 Subject: [PATCH 0049/1764] GPU: Improve kernel attribute resolution preprocessor logic --- .../Base/GPUReconstructionKernelMacros.h | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index f80b324970dc9..cd1180cbc9991 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -35,26 +35,24 @@ #ifndef GPUCA_KRNL_REG #define GPUCA_KRNL_REG(...) #endif -#define GPUCA_KRNL_REG_INTERNAL_PROP(...) GPUCA_M_STRIP(__VA_ARGS__) #ifndef GPUCA_KRNL_CUSTOM #define GPUCA_KRNL_CUSTOM(...) #endif -#define GPUCA_KRNL_CUSTOM_INTERNAL_PROP(...) -#define GPUCA_ATTRRES_REG(XX, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, XX))(num) GPUCA_ATTRRES2(XX, __VA_ARGS__) -#define GPUCA_ATTRRES2_REG(XX, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, XX))(num) GPUCA_ATTRRES3(XX, __VA_ARGS__) -#define GPUCA_ATTRRES_CUSTOM(XX, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, XX))(args) GPUCA_ATTRRES2(XX, __VA_ARGS__) -#define GPUCA_ATTRRES2_CUSTOM(XX, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, XX))(args) GPUCA_ATTRRES3(XX, __VA_ARGS__) -#define GPUCA_ATTRRES_NONE(XX, ...) -#define GPUCA_ATTRRES2_NONE(XX, ...) -#define GPUCA_ATTRRES_(XX, ...) -#define GPUCA_ATTRRES2_(XX, ...) -#define GPUCA_ATTRRES3(XX) // 3 attributes not supported -#define GPUCA_ATTRRES2(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES2_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) -#define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) +#define GPUCA_KRNL_REG_EXTRREG(...) GPUCA_M_STRIP(__VA_ARGS__) +#define GPUCA_KRNL_CUSTOM_EXTRREG(MODE, ...) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) +#define GPUCA_KRNL_NONE_EXTRREG(MODE, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_REG(MODE, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, MODE))(num) GPUCA_ATTRRES_XREG (MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_CUSTOM(MODE, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, MODE))(args) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_NONE(MODE, none, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_(MODE, ...) +#define GPUCA_ATTRRES_XNONE(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_XCUSTOM(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_XREG(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) // GPU Kernel entry point #define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(, GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); @@ -83,7 +81,7 @@ #define GPUCA_KRNL_PROP(x_class, x_attributes) \ template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_STRIP(x_attributes))}; \ + gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } From ccabdf2e66a38222b26dd143c053707341bf2768 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 22:23:47 +0100 Subject: [PATCH 0050/1764] GPU: Remove obsolete tracketConstructorWithoutPipeline option --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 --- .../Base/GPUReconstructionIncludes.h | 6 --- .../Definitions/GPUDefGPUParameters.h | 16 -------- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 - .../Global/GPUChainTrackingSectorTracker.cxx | 37 +++++++------------ .../SectorTracker/GPUTPCTracker.cxx | 2 +- 6 files changed, 15 insertions(+), 54 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 4b767a6b8a8f7..656fa37fb6a4c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -270,12 +270,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; - if (mProcessingSettings.trackletConstructorInPipeline < 0) { - mProcessingSettings.trackletConstructorInPipeline = 1; - } - if (mProcessingSettings.trackletSelectorSectors < 0) { - mProcessingSettings.trackletSelectorSectors = 1; - } } if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) { mProcessingSettings.createO2Output = 1; diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index f18ab21dc3972..6aba7e30a49d7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -30,12 +30,6 @@ #include #define GPUCA_GPUReconstructionUpdateDefaults() \ - if (mProcessingSettings.trackletConstructorInPipeline < 0) { \ - mProcessingSettings.trackletConstructorInPipeline = GPUCA_CONSTRUCTOR_IN_PIPELINE; \ - } \ - if (mProcessingSettings.trackletSelectorSectors < 0) { \ - mProcessingSettings.trackletSelectorSectors = GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT; \ - } \ if (mProcessingSettings.alternateBorderSort < 0) { \ mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ } \ diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 772b4684b590e..3ed6c25762405 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -84,11 +84,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -149,11 +147,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -214,11 +210,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -271,11 +265,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_COMP_GATHER_KERNEL 4 #define GPUCA_COMP_GATHER_MODE 3 @@ -530,9 +522,6 @@ #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 #endif - #ifndef GPUCA_CONSTRUCTOR_IN_PIPELINE - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #endif #ifndef GPUCA_ALTERNATE_BORDER_SORT #define GPUCA_ALTERNATE_BORDER_SORT 0 #endif @@ -542,9 +531,6 @@ #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 #endif - #ifndef GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results - #endif #ifndef GPUCA_COMP_GATHER_KERNEL #define GPUCA_COMP_GATHER_KERNEL 0 #endif @@ -554,11 +540,9 @@ #else #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 0 #define GPUCA_SORT_BEFORE_FIT 0 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_THREAD_COUNT_FINDER 1 #define GPUCA_COMP_GATHER_KERNEL 0 #define GPUCA_COMP_GATHER_MODE 0 diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 5663aed8033b7..9b6be7743e485 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -257,8 +257,6 @@ AddOption(autoAdjustHostThreads, bool, true, "", 0, "Auto-adjust number of OMP t AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") -AddOption(trackletSelectorSectors, int8_t, -1, "", 0, "Number of sectors to processes in parallel at max") -AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit") AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 4b9b8c33a0887..0eb8af6a6a006 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -218,27 +218,23 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() AllocateRegisteredMemory(trk.MemoryResOutput()); } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - trk.DumpHitWeights(*mDebugFile); - } + runKernel({GetGridAuto(useStream), {iSector}}); + DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + trk.DumpHitWeights(*mDebugFile); } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSector}}); - runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSector}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]); - streamMap[iSector] = useStream; - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); - } - DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + runKernel({GetGridAuto(useStream), {iSector}}); + runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSector}}); } + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]); + streamMap[iSector] = useStream; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); + } + DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); }); mRec->SetNActiveThreadsOuterLoop(1); if (error) { @@ -280,11 +276,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().keepAllMemory) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); - if (!GetProcessingSettings().trackletConstructorInPipeline) { - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - processors()->tpcTrackers[iSector].DumpHitWeights(*mDebugFile); - } - } } if (transferRunning[iSector]) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 28521b2987a45..4e815784f7cad 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -93,7 +93,7 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletConstructorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorLinks", reLinks); mMemoryResSectorScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSectorScratch"); From 22a9b80b1c0932cf3c7c11d9a20b197cae11537a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 13:40:43 +0100 Subject: [PATCH 0051/1764] Simplify GPUCAMath a bit --- GPU/Common/GPUCommonMath.h | 191 +++++++----------- .../GPUTPCCompressionKernels.cxx | 2 +- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 4 +- .../display/render/GPUDisplayDraw.cxx | 2 +- 4 files changed, 80 insertions(+), 119 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 5a813b74ed7b6..f4b9cd945799a 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -33,6 +33,15 @@ #include #endif +// GPUCA_CHOICE Syntax: GPUCA_CHOICE(Host, CUDA&HIP, OpenCL) +#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off + #define GPUCA_CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP +#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__) + #define GPUCA_CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL +#else + #define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host +#endif // clang-format on + namespace o2 { namespace gpu @@ -44,9 +53,15 @@ class GPUCommonMath GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr template - GPUhd() static T Min(const T x, const T y); + GPUhd() static T Min(const T x, const T y) + { + return GPUCA_CHOICE(std::min(x, y), min(x, y), min(x, y)); + } template - GPUhd() static T Max(const T x, const T y); + GPUhd() static T Max(const T x, const T y) + { + return GPUCA_CHOICE(std::max(x, y), max(x, y), max(x, y)); + } template GPUd() static T MinWithRef(T x, T y, S refX, S refY, R& r); template @@ -74,28 +89,28 @@ class GPUCommonMath GPUd() static float Pow(float x, float y); GPUd() static float Log(float x); GPUd() static float Exp(float x); - GPUhdni() static float Copysign(float x, float y); + GPUhdni() static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); } GPUd() static constexpr float TwoPi() { return 6.2831853f; } GPUd() static constexpr float Pi() { return 3.1415927f; } GPUd() static float Round(float x); - GPUd() static float Floor(float x); + GPUd() static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } GPUd() static uint32_t Float2UIntReint(const float& x); - GPUd() static uint32_t Float2UIntRn(float x); + GPUd() static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUd() static int32_t Float2IntRn(float x); GPUd() static float Modf(float x, float y); - GPUd() static bool Finite(float x); - GPUd() static bool IsNaN(float x); + GPUd() static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } + GPUd() static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() static float QuietNaN(); + GPUd() static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); GPUd() static void memcpy(void* dst, const void* src, size_t size); - GPUhdni() static float Hypot(float x, float y); - GPUhdni() static float Hypot(float x, float y, float z); - GPUhdni() static float Hypot(float x, float y, float z, float w); + GPUhdi() static float Hypot(float x, float y) { return Sqrt(x * x + y * y); } + GPUhdi() static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); } + GPUhdi() static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); } template GPUhd() static void Swap(T& a, T& b); @@ -154,15 +169,7 @@ class GPUCommonMath GPUd() constexpr static T nextMultipleOf(T val); template - GPUdi() static float Sum2(float w, Args... args) - { - if constexpr (sizeof...(Args) == 0) { - return w * w; - } else { - return w * w + Sum2(args...); - } - return 0; - } + GPUhdni() static float Sum2(float w, Args... args); private: template @@ -179,14 +186,16 @@ class GPUCommonMath typedef GPUCommonMath CAMath; -// CHOICE Syntax: CHOICE(Host, CUDA&HIP, OpenCL) -#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off - #define CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP -#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__) - #define CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL -#else - #define CHOICE(c1, c2, c3) (c1) // Select first option for Host -#endif // clang-format on +template +GPUhdi() float GPUCommonMath::Sum2(float w, Args... args) +{ + if constexpr (sizeof...(Args) == 0) { + return w * w; + } else { + return w * w + Sum2(args...); + } + return 0; +} GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size) { @@ -230,7 +239,7 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y) #endif // GPUCA_GPUCODE } -GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } +GPUdi() float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) { @@ -243,42 +252,36 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) #endif } -GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } -GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); } - -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } -GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), isnan(x)); } -GPUdi() float GPUCommonMath::QuietNaN() { return CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } #ifdef GPUCA_NO_FAST_MATH -GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } +GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } +GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } +GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } +GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } +GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } +GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } +GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } +GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } +GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } +GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } +GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else -GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); } -GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return CHOICE(sinf(x), sinf(x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return CHOICE(cosf(x), cosf(x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return CHOICE(tanf(x), tanf(x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return CHOICE(powf(x, y), powf(x, y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE(asinf(x), asinf(x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE(acosf(x), acosf(x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return CHOICE(logf(x), logf(x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } +GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } +GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } +GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); } +GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } +GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); } +GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); } +GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); } +GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); } +GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); } +GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } +GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } +GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif @@ -293,7 +296,7 @@ GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) #elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE)) sincosf(x, &s, &c); #else - CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); + GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); #endif } @@ -304,14 +307,14 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) #elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE)) sincos(x, &s, &c); #else - CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c)); + GPUCA_CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c)); #endif } GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) - return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available + return x == 0 ? 32 : GPUCA_CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available #else for (int32_t i = 31; i >= 0; i--) { if (x & (1u << i)) { @@ -326,7 +329,7 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed // use builtin if available - return CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); + return GPUCA_CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); #else x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); @@ -334,45 +337,16 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) #endif } -GPUhdi() float GPUCommonMath::Hypot(float x, float y) -{ - return Sqrt(x * x + y * y); -} - -GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z) -{ - return Sqrt(x * x + y * y + z * z); -} - -GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z, float w) -{ - return Sqrt(x * x + y * y + z * z + w * w); -} - template -GPUd() void _swap(T& a, T& b) +GPUhdi() void GPUCommonMath::Swap(T& a, T& b) { +#ifndef GPUCA_GPUCODE_DEVICE + std::swap(a, b); +#else T tmp = a; a = b; b = tmp; -} - -template -GPUhdi() void GPUCommonMath::Swap(T& a, T& b) -{ - CHOICE(std::swap(a, b), _swap(a, b), _swap(a, b)); -} - -template -GPUhdi() T GPUCommonMath::Min(const T x, const T y) -{ - return CHOICE(std::min(x, y), min(x, y), min(x, y)); -} - -template -GPUhdi() T GPUCommonMath::Max(const T x, const T y) -{ - return CHOICE(std::max(x, y), max(x, y), max(x, y)); +#endif } template @@ -441,32 +415,19 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x) template <> GPUhdi() float GPUCommonMath::Abs(float x) { - return CHOICE(fabsf(x), fabsf(x), fabs(x)); + return GPUCA_CHOICE(fabsf(x), fabsf(x), fabs(x)); } -#if !defined(__OPENCL__) || defined(cl_khr_fp64) template <> GPUhdi() double GPUCommonMath::Abs(double x) { - return CHOICE(fabs(x), fabs(x), fabs(x)); + return GPUCA_CHOICE(fabs(x), fabs(x), fabs(x)); } -#endif template <> GPUhdi() int32_t GPUCommonMath::Abs(int32_t x) { - return CHOICE(abs(x), abs(x), abs(x)); -} - -GPUhdi() float GPUCommonMath::Copysign(float x, float y) -{ -#if defined(__OPENCL__) - return copysign(x, y); -#elif defined(GPUCA_GPUCODE) && !defined(__OPENCL__) - return copysignf(x, y); -#else - return std::copysignf(x, y); -#endif // GPUCA_GPUCODE + return GPUCA_CHOICE(abs(x), abs(x), abs(x)); } template @@ -579,7 +540,7 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt } #endif -#undef CHOICE +#undef GPUCA_CHOICE } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 966bffa963c7e..4831be9b12bcc 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -199,7 +199,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSector][iRow]); + const uint32_t nn = CAMath::nextMultipleOf(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; int32_t cidx = 0; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index 1bd2eca769913..fa0711887f60f 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -787,8 +787,8 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK if (mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId == trkltIdx) { continue; } - if (GPUCommonMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetZbin() - tracklets[trkltIdx].GetZbin()) == 1 && - GPUCommonMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetY() - tracklets[trkltIdx].GetY()) < 1) { + if (CAMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetZbin() - tracklets[trkltIdx].GetZbin()) == 1 && + CAMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetY() - tracklets[trkltIdx].GetY()) < 1) { trkWork->setIsCrossingNeighbor(iLayer); trkWork->setHasNeighbor(); break; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 188df5467e83d..24668c576d795 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -64,7 +64,7 @@ inline void GPUDisplay::drawPointLinestrip(int32_t iSector, int32_t cid, int32_t mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); float curVal; while ((curVal = mGlobalPos[cid].w) < id_limit) { - if (GPUCommonMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { + if (CAMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { break; } curVal = mGlobalPos[cid].w; From c68243887320761f3a8ca2526403808a8fb2b7f0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 15:57:21 +0100 Subject: [PATCH 0052/1764] GPU TPC: Get rid of duplicate ReadEvent code path for initializing tracking data on CPU --- GPU/GPUTracking/Global/GPUChainTracking.h | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 26 +++---------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 4a2778851e517..e7d6f420b9c4d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -220,7 +220,6 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - int32_t ReadEvent(uint32_t iSector, int32_t threadId); void WriteOutput(int32_t iSector, int32_t threadId); int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 0eb8af6a6a006..efb3deb257a42 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -158,17 +158,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Creating Sector Data (Sector %d)", iSector); } - if (doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; - } else { - if (ReadEvent(iSector, 0)) { - GPUError("Error reading event"); - error = 1; - return; - } - } + TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); + runKernel({doGPU ? GetGridBlk(GPUCA_ROW_COUNT, useStream) : GetGridAuto(0), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); // TODO: Check why GetGridAuto(0) is much fast on CPU + streamInit[useStream] = true; if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}}); } @@ -381,18 +373,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() return 0; } -int32_t GPUChainTracking::ReadEvent(uint32_t iSector, int32_t threadId) -{ - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running ReadEvent for sector %d on thread %d\n", iSector, threadId); - } - runKernel({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSector}}); - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished ReadEvent for sector %d on thread %d\n", iSector, threadId); - } - return (0); -} - void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId) { if (GetProcessingSettings().debugLevel >= 5) { From 5848069446af06802fc6221868e9d075bba3d257 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 16:24:19 +0100 Subject: [PATCH 0053/1764] GPU TPC: Remove option to write out / start from obsolete TPC sector track data format --- GPU/GPUTracking/Base/GPUReconstruction.h | 1 - GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 1 - .../Base/GPUReconstructionDeviceBase.cxx | 1 - GPU/GPUTracking/CMakeLists.txt | 1 - GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 9 +- GPU/GPUTracking/Global/GPUChainTracking.h | 3 +- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 36 +---- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 1 - GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 1 - .../SectorTracker/GPUTPCSectorOutput.cxx | 57 -------- .../SectorTracker/GPUTPCSectorOutput.h | 79 ----------- .../SectorTracker/GPUTPCTracker.cxx | 123 +----------------- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 9 -- .../SectorTracker/GPUTPCTrackerDump.cxx | 17 --- .../Standalone/Benchmark/standalone.cxx | 1 - 18 files changed, 11 insertions(+), 333 deletions(-) delete mode 100644 GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx delete mode 100644 GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 529cce2bd087f..a0248180a5e2c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -31,7 +31,6 @@ #include "GPUOutputControl.h" #include "GPUMemoryResource.h" #include "GPUConstantMem.h" -#include "GPUTPCSectorOutput.h" #include "GPULogging.h" namespace o2::its diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index f5d350b4064d0..f397fc51bd407 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -18,7 +18,6 @@ #include "GPUChain.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 64d9351b447e2..d1091f59b784a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -16,7 +16,6 @@ #include "GPUReconstructionIncludes.h" #include "GPUTPCTracker.h" -#include "GPUTPCSectorOutput.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index b65674a68e6aa..c97742ac1d47f 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -37,7 +37,6 @@ set(SRCS SectorTracker/GPUTPCNeighboursCleaner.cxx SectorTracker/GPUTPCTracker.cxx SectorTracker/GPUTPCTrackingData.cxx - SectorTracker/GPUTPCSectorOutput.cxx SectorTracker/GPUTPCTrackletConstructor.cxx SectorTracker/GPUTPCSectorDebugSortKernels.cxx SectorTracker/GPUTPCCreateOccupancyMap.cxx diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 51b5c0b101537..f7bfe38be988d 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -139,7 +139,7 @@ class GPUDataTypes AllRecoSteps = 0x7FFFFFFF, NoRecoStep = 0 }; enum ENUM_CLASS InOutType { TPCClusters = 1, - TPCSectorTracks = 2, + OBSOLETE = 2, TPCMergedTracks = 4, TPCCompressedClusters = 8, TRDTracklets = 16, diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 0e1cde343135e..c186f916891ba 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -20,7 +20,6 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" @@ -185,7 +184,7 @@ bool GPUChainTracking::ValidateSteps() GPUError("Invalid input, TPC Clusterizer needs TPC raw input"); return false; } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && ((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion))) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { GPUError("Invalid input / output / step, merger cannot read/store sectors tracks and needs TPC conversion"); return false; } @@ -204,7 +203,7 @@ bool GPUChainTracking::ValidateSteps() GPUError("Missing input for TPC Cluster conversion / sector tracking / compression / dEdx: TPC Clusters required"); return false; } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking))) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { GPUError("Input for TPC merger missing"); return false; } @@ -220,10 +219,6 @@ bool GPUChainTracking::ValidateSteps() GPUError("TPC Raw / TPC Clusters / TRD Tracklets cannot be output"); return false; } - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { - GPUError("No TPC Sector Tracker Output available"); - return false; - } if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCMergedTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) { GPUError("No TPC Merged Track Output available"); return false; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index e7d6f420b9c4d..194573981838e 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -220,7 +220,6 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - void WriteOutput(int32_t iSector, int32_t threadId); int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); int32_t PrepareProfile(); @@ -280,7 +279,7 @@ class GPUChainTracking : public GPUChain // Synchronization and Locks eventStruct* mEvents = nullptr; volatile int32_t mSectorSelectorReady = 0; - std::array mWriteOutputDone; + std::array mExtrapolationTrackingDone; std::vector mOutputQueue; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index c4dddd4b8b88f..4f7846b852b98 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -15,7 +15,6 @@ #include "GPUChainTracking.h" #include "GPUReconstructionIO.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index efb3deb257a42..522ccbad47e59 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -242,9 +242,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() std::array transferRunning; transferRunning.fill(true); - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { + if (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)) { // TODO: This seems pretty obsolete code path, can probably be removed. if (param().rec.tpc.extrapolationTracking) { - mWriteOutputDone.fill(0); + mExtrapolationTrackingDone.fill(0); } uint32_t tmpSector = 0; @@ -288,18 +288,15 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() uint32_t sectorLeft, sectorRight; GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); - if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mWriteOutputDone[tmpSector2] == 0) { + if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mExtrapolationTrackingDone[tmpSector2] == 0) { ExtrapolationTracking(tmpSector2, 0); - WriteOutput(tmpSector2, 0); - mWriteOutputDone[tmpSector2] = 1; + mExtrapolationTrackingDone[tmpSector2] = 1; } } - } else { - WriteOutput(iSector, 0); } } } - if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) { + if (param().rec.tpc.extrapolationTracking) { std::vector blocking(NSECTORS * mRec->NStreams()); for (int32_t i = 0; i < NSECTORS; i++) { for (int32_t j = 0; j < mRec->NStreams(); j++) { @@ -308,7 +305,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); - if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) { + if (!(doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { uint32_t sectorLeft, sectorRight; GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { @@ -334,9 +331,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (param().rec.tpc.extrapolationTracking) { ExtrapolationTracking(iSector, 0); } - if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { - WriteOutput(iSector, 0); - } }); mRec->SetNActiveThreadsOuterLoop(1); } @@ -348,12 +342,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - if (GetProcessingSettings().debugMask & 1024 && !GetProcessingSettings().deterministicGPUReconstruction) { - for (uint32_t i = 0; i < NSECTORS; i++) { - processors()->tpcTrackers[i].DumpOutput(*mDebugFile); - } - } - if (DoProfile()) { return (1); } @@ -372,15 +360,3 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK")); return 0; } - -void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId) -{ - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running WriteOutput for sector %d on thread %d\n", iSector, threadId); - } - processors()->tpcTrackers[iSector].WriteOutputPrepare(); - processors()->tpcTrackers[iSector].WriteOutput(); - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished WriteOutput for sector %d on thread %d\n", iSector, threadId); - } -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f6a50565bac52..fa85d796baeba 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -42,7 +42,6 @@ #include "GPUCommonConstants.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUTPCTrackLinearisation.h" diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index ae6a2582d833a..506dd88ab7058 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -42,7 +42,6 @@ struct ClusterNative; namespace o2::gpu { class GPUTPCSectorTrack; -class GPUTPCSectorOutput; class GPUTPCGMTrackParam; class GPUTPCTracker; class GPUChainTracking; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index d6dfcc8424e65..ae413aaa98648 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -25,7 +25,6 @@ #include "GPUO2DataTypes.h" #include "GPUCommonMath.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUParam.inc" diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx deleted file mode 100644 index 864a5c6b7106e..0000000000000 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCSectorOutput.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUOutputControl.h" -#include "GPUTPCSectorOutput.h" -#include "GPUCommonMath.h" -#include - -using namespace o2::gpu; - -uint32_t GPUTPCSectorOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) -{ - // calculate the amount of memory [bytes] needed for the event - return sizeof(GPUTPCSectorOutput) + sizeof(GPUTPCTrack) * nOfTracks + sizeof(GPUTPCSectorOutCluster) * nOfTrackClusters; -} - -#ifndef GPUCA_GPUCODE -void GPUTPCSectorOutput::Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory) -{ - // Allocate All memory needed for sector output - const size_t memsize = EstimateSize(nTracks, nTrackHits); - - if (outputControl && outputControl->useExternal()) { - static std::atomic_flag lock = ATOMIC_FLAG_INIT; - while (lock.test_and_set(std::memory_order_acquire)) { - } - outputControl->checkCurrent(); - if (outputControl->size - ((char*)outputControl->ptrCurrent - (char*)outputControl->ptrBase) < memsize) { - outputControl->size = 1; - ptrOutput = nullptr; - lock.clear(std::memory_order_release); - return; - } - ptrOutput = reinterpret_cast(outputControl->ptrCurrent); - outputControl->ptrCurrent = (char*)outputControl->ptrCurrent + memsize; - lock.clear(std::memory_order_release); - } else { - if (internalMemory) { - free(internalMemory); - } - internalMemory = malloc(memsize); - ptrOutput = reinterpret_cast(internalMemory); - } - ptrOutput->SetMemorySize(memsize); -} -#endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h deleted file mode 100644 index cc02206dc09a7..0000000000000 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCSectorOutput.h -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#ifndef GPUTPCSECTOROUTPUT_H -#define GPUTPCSECTOROUTPUT_H - -#include "GPUTPCDef.h" -#include "GPUTPCTrack.h" - -namespace o2::gpu -{ -struct GPUOutputControl; - -/** - * @class GPUTPCSectorOutput - * - * GPUTPCSectorOutput class is used to store the output of GPUTPCTracker{Component} - * and transport the output to GPUTPCGBMerger{Component} - * - * The class contains all the necessary information about TPC tracks, reconstructed in one sector. - * This includes the reconstructed track parameters and some compressed information - * about the assigned clusters: clusterId, position and amplitude. - * - */ -class GPUTPCSectorOutput -{ - public: - GPUhd() uint32_t NTracks() const - { - return mNTracks; - } - GPUhd() uint32_t NLocalTracks() const { return mNLocalTracks; } - GPUhd() uint32_t NTrackClusters() const { return mNTrackClusters; } - GPUhd() const GPUTPCTrack* GetFirstTrack() const - { - return (const GPUTPCTrack*)((const char*)this + sizeof(*this)); - } - GPUhd() GPUTPCTrack* FirstTrack() - { - return (GPUTPCTrack*)((char*)this + sizeof(*this)); - } - GPUhd() size_t Size() const - { - return (mMemorySize); - } - - static uint32_t EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters); - static void Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory); - - GPUhd() void SetNTracks(uint32_t v) { mNTracks = v; } - GPUhd() void SetNLocalTracks(uint32_t v) { mNLocalTracks = v; } - GPUhd() void SetNTrackClusters(uint32_t v) { mNTrackClusters = v; } - - private: - GPUTPCSectorOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! - ~GPUTPCSectorOutput() = delete; // NOLINT - GPUTPCSectorOutput(const GPUTPCSectorOutput&) = delete; // NOLINT - GPUTPCSectorOutput& operator=(const GPUTPCSectorOutput&) = delete; // NOLINT - - GPUhd() void SetMemorySize(size_t val) { mMemorySize = val; } - - uint32_t mNTracks; // number of reconstructed tracks - uint32_t mNLocalTracks; - uint32_t mNTrackClusters; // total number of track clusters - size_t mMemorySize; // Amount of memory really used -}; -} // namespace o2::gpu -#endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 4e815784f7cad..6c1b4eda0d7f5 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -18,7 +18,6 @@ #include "GPUCommonMath.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" @@ -39,12 +38,7 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) -GPUTPCTracker::~GPUTPCTracker() -{ - if (mOutputMemory) { - free(mOutputMemory); - } -} +GPUTPCTracker::~GPUTPCTracker() = default; // ---------------------------------------------------------------------------------- void GPUTPCTracker::SetSector(int32_t iSector) { mISector = iSector; } @@ -173,124 +167,9 @@ GPUh() int32_t GPUTPCTracker::CheckEmptySector() // Check if the Sector is empty, if so set the output apropriate and tell the reconstuct procesdure to terminate if (NHitsTotal() < 1) { mCommonMem->nTracks = mCommonMem->nTrackHits = 0; - if (mOutput) { - WriteOutputPrepare(); - mOutput->SetNTracks(0); - mOutput->SetNTrackClusters(0); - } return 1; } return 0; } -GPUh() void GPUTPCTracker::WriteOutputPrepare() { GPUTPCSectorOutput::Allocate(mOutput, mCommonMem->nTracks, mCommonMem->nTrackHits, &mRec->OutputControl(), mOutputMemory); } - -template -static inline bool SortComparison(const T& a, const T& b) -{ - return (a.fSortVal < b.fSortVal); -} - -GPUh() void GPUTPCTracker::WriteOutput() -{ - mOutput->SetNTracks(0); - mOutput->SetNLocalTracks(0); - mOutput->SetNTrackClusters(0); - - if (mCommonMem->nTracks == 0) { - return; - } - if (mCommonMem->nTracks > GPUCA_MAX_SECTOR_NTRACK) { - GPUError("Maximum number of tracks exceeded, cannot store"); - return; - } - - int32_t nStoredHits = 0; - int32_t nStoredTracks = 0; - int32_t nStoredLocalTracks = 0; - - GPUTPCTrack* out = mOutput->FirstTrack(); - - trackSortData* trackOrder = new trackSortData[mCommonMem->nTracks]; - for (uint32_t i = 0; i < mCommonMem->nTracks; i++) { - trackOrder[i].fTtrack = i; - trackOrder[i].fSortVal = mTracks[trackOrder[i].fTtrack].NHits() / 1000.f + mTracks[trackOrder[i].fTtrack].Param().GetZ() * 100.f + mTracks[trackOrder[i].fTtrack].Param().GetY(); - } - std::sort(trackOrder, trackOrder + mCommonMem->nLocalTracks, SortComparison); // TODO: Check why this sorting affects the merging efficiency! - std::sort(trackOrder + mCommonMem->nLocalTracks, trackOrder + mCommonMem->nTracks, SortComparison); - - for (uint32_t iTrTmp = 0; iTrTmp < mCommonMem->nTracks; iTrTmp++) { - const int32_t iTr = trackOrder[iTrTmp].fTtrack; - GPUTPCTrack& iTrack = mTracks[iTr]; - - *out = iTrack; - int32_t nClu = 0; - int32_t iID = iTrack.FirstHitID(); - - for (int32_t ith = 0; ith < iTrack.NHits(); ith++) { - const GPUTPCHitId& ic = mTrackHits[iID + ith]; - int32_t iRow = ic.RowIndex(); - int32_t ih = ic.HitIndex(); - - const GPUTPCRow& row = mData.Row(iRow); - int32_t clusterIndex = mData.ClusterDataIndex(row, ih); -#ifdef GPUCA_ARRAY_BOUNDS_CHECKS - if (ih >= row.NHits() || ih < 0) { - GPUError("Array out of bounds access (Sector Row) (Hit %d / %d - NumC %d): Sector %d Row %d Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih); - fflush(stdout); - continue; - } - if (clusterIndex >= NHitsTotal() || clusterIndex < 0) { - GPUError("Array out of bounds access (Cluster Data) (Hit %d / %d - NumC %d): Sector %d Row %d Hit %d, Clusterdata Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih, clusterIndex); - fflush(stdout); - continue; - } -#endif - - float origX, origY, origZ; - uint8_t flags; - uint16_t amp; - int32_t id; - if (Param().par.earlyTpcTransform) { - origX = mData.ClusterData()[clusterIndex].x; - origY = mData.ClusterData()[clusterIndex].y; - origZ = mData.ClusterData()[clusterIndex].z; - flags = mData.ClusterData()[clusterIndex].flags; - amp = mData.ClusterData()[clusterIndex].amp; - id = mData.ClusterData()[clusterIndex].id; - } else { - const ClusterNativeAccess& cls = *mConstantMem->ioPtrs.clustersNative; - id = clusterIndex + cls.clusterOffset[mISector][0]; - GPUTPCConvertImpl::convert(*mConstantMem, mISector, iRow, cls.clustersLinear[id].getPad(), cls.clustersLinear[id].getTime(), origX, origY, origZ); - flags = cls.clustersLinear[id].getFlags(); - amp = cls.clustersLinear[id].qTot; - } - GPUTPCSectorOutCluster c; - c.Set(id, iRow, flags, amp, origX, origY, origZ); -#ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME - c.mPad = mData.ClusterData()[clusterIndex].pad; - c.mTime = mData.ClusterData()[clusterIndex].time; -#endif - out->SetOutTrackCluster(nClu, c); - nClu++; - } - - nStoredTracks++; - if (iTr < mCommonMem->nLocalTracks) { - nStoredLocalTracks++; - } - nStoredHits += nClu; - out->SetNHits(nClu); - out = out->NextTrack(); - } - delete[] trackOrder; - - mOutput->SetNTracks(nStoredTracks); - mOutput->SetNLocalTracks(nStoredLocalTracks); - mOutput->SetNTrackClusters(nStoredHits); - if (Param().par.debugLevel >= 3) { - GPUInfo("Sector %d, Output: Tracks %d, local tracks %d, hits %d", mISector, nStoredTracks, nStoredLocalTracks, nStoredHits); - } -} - #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index 4a789b5adf6bf..e8aac872198f5 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -29,7 +29,6 @@ namespace o2::gpu { -class GPUTPCSectorOutput; struct GPUTPCClusterData; struct GPUParam; class GPUTPCTrack; @@ -50,8 +49,6 @@ class GPUTPCTracker : public GPUProcessor void InitializeRows(const GPUParam* param) { mData.InitializeRows(*param); } int32_t CheckEmptySector(); - void WriteOutputPrepare(); - void WriteOutput(); // Debugging Stuff void DumpTrackingData(std::ostream& out); // Dump Input Sector Data @@ -60,7 +57,6 @@ class GPUTPCTracker : public GPUProcessor void DumpHitWeights(std::ostream& out); //.... void DumpTrackHits(std::ostream& out); // Same for Track Hits void DumpTrackletHits(std::ostream& out); // Same for Track Hits - void DumpOutput(std::ostream& out); // Similar for output #endif struct StructGPUParameters { @@ -88,7 +84,6 @@ class GPUTPCTracker : public GPUProcessor return mData.ClusterData(); } GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } - GPUhdi() GPUglobalref() GPUTPCSectorOutput* Output() const { return mOutput; } GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); @@ -268,10 +263,6 @@ class GPUTPCTracker : public GPUProcessor GPUglobalref() GPUTPCTrack* mTracks = nullptr; // reconstructed tracks GPUglobalref() GPUTPCHitId* mTrackHits = nullptr; // array of track hit numbers - // output - GPUglobalref() GPUTPCSectorOutput* mOutput; // address of pointer pointing to SectorOutput Object - void* mOutputMemory; // Pointer to output memory if stored internally - static int32_t StarthitSortComparison(const void* a, const void* b); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx index ba1727fa602a4..7d83ff9abd91c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx @@ -13,7 +13,6 @@ /// \author David Rohr #include "GPUTPCTracker.h" -#include "GPUTPCSectorOutput.h" #include "GPUReconstruction.h" #include "GPUTPCHitId.h" #include "GPUTPCTrack.h" @@ -26,22 +25,6 @@ using namespace o2::gpu; -void GPUTPCTracker::DumpOutput(std::ostream& out) -{ - if (Param().par.earlyTpcTransform) { - out << "\nSector " << mISector << "\n"; - const GPUTPCTrack* track = (Output())->GetFirstTrack(); - for (uint32_t j = 0; j < (Output())->NTracks(); j++) { - out << "Track " << j << " (" << track->NHits() << "): "; - for (int32_t k = 0; k < track->NHits(); k++) { - out << "(" << track->OutTrackCluster(k).GetX() << "," << track->OutTrackCluster(k).GetY() << "," << track->OutTrackCluster(k).GetZ() << ") "; - } - out << " - (" << track->Param().Y() << " " << track->Param().Z() << " " << track->Param().SinPhi() << " " << track->Param().DzDs() << " " << track->Param().QPt() << "\n"; - track = track->GetNextTrack(); - } - } -} - void GPUTPCTracker::DumpTrackingData(std::ostream& out) { // Dump Sector Input Data to File diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 682e6913d58d4..d6279df7c9188 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -416,7 +416,6 @@ int32_t SetupReconstruction() } steps.outputs.clear(); - steps.outputs.setBits(GPUDataTypes::InOutType::TPCSectorTracks, false); steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCMerging)); steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCCompression)); steps.outputs.setBits(GPUDataTypes::InOutType::TRDTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)); From ee009a4291823f61960932bbfde88597e60a6d6d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 16:42:27 +0100 Subject: [PATCH 0054/1764] GPU Math: Make constexpr what possible --- GPU/Common/GPUCommonMath.h | 154 ++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index f4b9cd945799a..e977b3679a4ee 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -53,12 +53,12 @@ class GPUCommonMath GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr template - GPUhd() static T Min(const T x, const T y) + GPUhd() constexpr static T Min(const T x, const T y) { return GPUCA_CHOICE(std::min(x, y), min(x, y), min(x, y)); } template - GPUhd() static T Max(const T x, const T y) + GPUhd() constexpr static T Max(const T x, const T y) { return GPUCA_CHOICE(std::max(x, y), max(x, y), max(x, y)); } @@ -69,51 +69,51 @@ class GPUCommonMath template GPUd() static T MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r); template - GPUdi() static T Clamp(const T v, const T lo, const T hi) + GPUdi() constexpr static T Clamp(const T v, const T lo, const T hi) { return Max(lo, Min(v, hi)); } - GPUhdni() static float Sqrt(float x); + GPUhdni() constexpr static float Sqrt(float x); GPUd() static float InvSqrt(float x); template - GPUhd() static T Abs(T x); - GPUd() static float ASin(float x); - GPUd() static float ACos(float x); - GPUd() static float ATan(float x); - GPUhd() static float ATan2(float y, float x); - GPUd() static float Sin(float x); - GPUd() static float Cos(float x); + GPUhd() constexpr static T Abs(T x); + GPUd() constexpr static float ASin(float x); + GPUd() constexpr static float ACos(float x); + GPUd() constexpr static float ATan(float x); + GPUhd() constexpr static float ATan2(float y, float x); + GPUd() constexpr static float Sin(float x); + GPUd() constexpr static float Cos(float x); GPUhdni() static void SinCos(float x, float& s, float& c); GPUhdni() static void SinCosd(double x, double& s, double& c); - GPUd() static float Tan(float x); - GPUd() static float Pow(float x, float y); - GPUd() static float Log(float x); - GPUd() static float Exp(float x); - GPUhdni() static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); } - GPUd() static constexpr float TwoPi() { return 6.2831853f; } - GPUd() static constexpr float Pi() { return 3.1415927f; } - GPUd() static float Round(float x); - GPUd() static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } - GPUd() static uint32_t Float2UIntReint(const float& x); - GPUd() static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } - GPUd() static int32_t Float2IntRn(float x); - GPUd() static float Modf(float x, float y); - GPUd() static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } - GPUd() static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } - GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } - GPUd() static uint32_t Clz(uint32_t val); - GPUd() static uint32_t Popcount(uint32_t val); + GPUd() constexpr static float Tan(float x); + GPUd() constexpr static float Pow(float x, float y); + GPUd() constexpr static float Log(float x); + GPUd() constexpr static float Exp(float x); + GPUhdni() constexpr static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); } + GPUd() constexpr static float TwoPi() { return 6.2831853f; } + GPUd() constexpr static float Pi() { return 3.1415927f; } + GPUd() constexpr static float Round(float x); + GPUd() constexpr static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } + GPUd() static uint32_t Float2UIntReint(float x); + GPUd() constexpr static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } + GPUd() constexpr static int32_t Float2IntRn(float x); + GPUd() constexpr static float Modf(float x, float y); + GPUd() constexpr static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } + GPUd() constexpr static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } + GPUd() constexpr static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() constexpr static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() constexpr static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } + GPUd() constexpr static uint32_t Clz(uint32_t val); + GPUd() constexpr static uint32_t Popcount(uint32_t val); GPUd() static void memcpy(void* dst, const void* src, size_t size); - GPUhdi() static float Hypot(float x, float y) { return Sqrt(x * x + y * y); } - GPUhdi() static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); } - GPUhdi() static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); } + GPUhdi() constexpr static float Hypot(float x, float y) { return Sqrt(x * x + y * y); } + GPUhdi() constexpr static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); } + GPUhdi() constexpr static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); } template - GPUhd() static void Swap(T& a, T& b); + GPUhd() constexpr static void Swap(T& a, T& b); template GPUdi() static T AtomicExch(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val) @@ -162,14 +162,14 @@ class GPUCommonMath { GPUCommonMath::AtomicMinInternal(addr, val); } - GPUd() static int32_t Mul24(int32_t a, int32_t b); - GPUd() static float FMulRZ(float a, float b); + GPUd() constexpr static int32_t Mul24(int32_t a, int32_t b); + GPUd() constexpr static float FMulRZ(float a, float b); template GPUd() constexpr static T nextMultipleOf(T val); template - GPUhdni() static float Sum2(float w, Args... args); + GPUhdni() constexpr static float Sum2(float w, Args... args); private: template @@ -187,7 +187,7 @@ class GPUCommonMath typedef GPUCommonMath CAMath; template -GPUhdi() float GPUCommonMath::Sum2(float w, Args... args) +GPUhdi() constexpr float GPUCommonMath::Sum2(float w, Args... args) { if constexpr (sizeof...(Args) == 0) { return w * w; @@ -239,9 +239,9 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y) #endif // GPUCA_GPUCODE } -GPUdi() float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } +GPUdi() constexpr float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } -GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) +GPUdi() uint32_t GPUCommonMath::Float2UIntReint(float x) { #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) return __float_as_uint(x); @@ -253,37 +253,37 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) } #ifdef GPUCA_NO_FAST_MATH -GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } -GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } -GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } -GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } +GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } +GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } +GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } +GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } +GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } +GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } +GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } +GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } +GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } +GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } +GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } +GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } +GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } +GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else -GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } -GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } -GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } -GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } +GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } +GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } +GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } +GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); } +GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } +GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); } +GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); } +GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); } +GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); } +GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); } +GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } +GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } +GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return true; } +GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) @@ -311,7 +311,7 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) #endif } -GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) +GPUdi() constexpr uint32_t GPUCommonMath::Clz(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) return x == 0 ? 32 : GPUCA_CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available @@ -325,7 +325,7 @@ GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) #endif } -GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) +GPUdi() constexpr uint32_t GPUCommonMath::Popcount(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed // use builtin if available @@ -338,7 +338,7 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) } template -GPUhdi() void GPUCommonMath::Swap(T& a, T& b) +GPUhdi() constexpr void GPUCommonMath::Swap(T& a, T& b) { #ifndef GPUCA_GPUCODE_DEVICE std::swap(a, b); @@ -413,19 +413,19 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x) } template <> -GPUhdi() float GPUCommonMath::Abs(float x) +GPUhdi() constexpr float GPUCommonMath::Abs(float x) { return GPUCA_CHOICE(fabsf(x), fabsf(x), fabs(x)); } template <> -GPUhdi() double GPUCommonMath::Abs(double x) +GPUhdi() constexpr double GPUCommonMath::Abs(double x) { return GPUCA_CHOICE(fabs(x), fabs(x), fabs(x)); } template <> -GPUhdi() int32_t GPUCommonMath::Abs(int32_t x) +GPUhdi() constexpr int32_t GPUCommonMath::Abs(int32_t x) { return GPUCA_CHOICE(abs(x), abs(x), abs(x)); } From 938108f38d43deaf499bc2fe96cd765b4288825a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 17:09:16 +0100 Subject: [PATCH 0055/1764] GPU TPC: Make GPUTPCGeometry fully constexpr --- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 166 +++++++++--------- .../TPCClusterFinder/ClusterAccumulator.h | 2 +- .../TPCClusterFinder/GPUTPCClusterFinder.h | 2 - 3 files changed, 86 insertions(+), 84 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index 461ac9366ca23..9ad83bff363ac 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -28,119 +28,123 @@ namespace o2::gpu { -// Copy of TPC constants from AliRoot:TPCGeometry / O2:TPC/Base/Mapper -// Should be unified, but cannot take the contants from the official headers for now, since we want it to be constexpr -class GPUTPCGeometry // TODO: Make values constexpr +// Copy of TPC constants from AliRoot:TPCGeometry / O2:TPC/Base/Mapper, cannot take the contants from the official headers for now, since we want it to be const / constexpr + +namespace gputpcgeometry_internal { #ifdef GPUCA_TPC_GEOMETRY_O2 - const float mX[GPUCA_ROW_COUNT] = {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, - 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, - 118.225f, 118.975f, 119.725f, 120.475f, 121.225f, 121.975f, 122.725f, 123.475f, 124.225f, 124.975f, 125.725f, 126.475f, 127.225f, 127.975f, 128.725f, 129.475f, 130.225f, 130.975f, 131.725f, 135.2f, 136.2f, 137.2f, - 138.2f, 139.2f, 140.2f, 141.2f, 142.2f, 143.2f, 144.2f, 145.2f, 146.2f, 147.2f, 148.2f, 149.2f, 150.2f, 151.2f, 152.2f, 153.2f, 154.2f, 155.2f, 156.2f, 157.2f, 158.2f, 159.2f, - 160.2f, 161.2f, 162.2f, 163.2f, 164.2f, 165.2f, 166.2f, 167.2f, 168.2f, 171.4f, 172.6f, 173.8f, 175.f, 176.2f, 177.4f, 178.6f, 179.8f, 181.f, 182.2f, 183.4f, 184.6f, 185.8f, - 187.f, 188.2f, 189.4f, 190.6f, 191.8f, 193.f, 194.2f, 195.4f, 196.6f, 197.8f, 199.f, 200.2f, 201.4f, 202.6f, 203.8f, 205.f, 206.2f, 209.65f, 211.15f, 212.65f, 214.15f, 215.65f, - 217.15f, 218.65f, 220.15f, 221.65f, 223.15f, 224.65f, 226.15f, 227.65f, 229.15f, 230.65f, 232.15f, 233.65f, 235.15f, 236.65f, 238.15f, 239.65f, 241.15f, 242.65f, 244.15f, 245.65f}; - - const uint8_t mNPads[GPUCA_ROW_COUNT] = {66, 66, 66, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, - 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 92, 92, 92, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, - 82, 84, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 102, 102, 102, 104, 104, 104, 106, 110, - 110, 112, 112, 112, 114, 114, 114, 116, 116, 116, 118, 118, 118, 118, 118, 120, 120, 122, 122, 124, 124, 124, 126, 126, 128, 128, 128, 130, 130, 132, 132, 132, 134, 134, 136, 136, 138, 138}; - - const uint8_t mRegion[GPUCA_ROW_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; - const uint8_t mRegionRows[10] = {17, 15, 16, 15, 18, 16, 16, 14, 13, 12}; - const uint8_t mRegionStart[10] = {0, 17, 32, 48, 63, 81, 97, 113, 127, 140}; - - const uint8_t mSampaMapping[10] = {0, 0, 1, 1, 2, 3, 3, 4, 4, 2}; - const uint8_t mChannelOffset[10] = {0, 16, 0, 16, 0, 0, 16, 0, 16, 16}; - const uint8_t mSectorFECOffset[5] = {0, 15, 15 + 18, 15 + 18 + 18, 15 + 18 + 18 + 20}; - - const float mPadHeight[10] = {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}; - const float mPadWidth[10] = {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}; - - static constexpr float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! - - public: - GPUd() int32_t GetRegion(int32_t row) const { return mRegion[row]; } - GPUd() int32_t GetRegionRows(int32_t region) const { return mRegionRows[region]; } - GPUd() int32_t GetRegionStart(int32_t region) const { return mRegionStart[region]; } - GPUd() int32_t GetSampaMapping(int32_t region) const { return mSampaMapping[region]; } - GPUd() int32_t GetChannelOffset(int32_t region) const { return mChannelOffset[region]; } - GPUd() int32_t GetSectorFECOffset(int32_t partition) const { return mSectorFECOffset[partition]; } - GPUd() int32_t GetROC(int32_t row) const { return row < 97 ? (row < 63 ? 0 : 1) : (row < 127 ? 2 : 3); } - GPUd() int32_t EndIROC() const { return 63; } - GPUd() int32_t EndOROC1() const { return 97; } - GPUd() int32_t EndOROC2() const { return 127; } +GPUconstexpr() float mX[GPUCA_ROW_COUNT] = {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, + 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, + 118.225f, 118.975f, 119.725f, 120.475f, 121.225f, 121.975f, 122.725f, 123.475f, 124.225f, 124.975f, 125.725f, 126.475f, 127.225f, 127.975f, 128.725f, 129.475f, 130.225f, 130.975f, 131.725f, 135.2f, 136.2f, 137.2f, + 138.2f, 139.2f, 140.2f, 141.2f, 142.2f, 143.2f, 144.2f, 145.2f, 146.2f, 147.2f, 148.2f, 149.2f, 150.2f, 151.2f, 152.2f, 153.2f, 154.2f, 155.2f, 156.2f, 157.2f, 158.2f, 159.2f, + 160.2f, 161.2f, 162.2f, 163.2f, 164.2f, 165.2f, 166.2f, 167.2f, 168.2f, 171.4f, 172.6f, 173.8f, 175.f, 176.2f, 177.4f, 178.6f, 179.8f, 181.f, 182.2f, 183.4f, 184.6f, 185.8f, + 187.f, 188.2f, 189.4f, 190.6f, 191.8f, 193.f, 194.2f, 195.4f, 196.6f, 197.8f, 199.f, 200.2f, 201.4f, 202.6f, 203.8f, 205.f, 206.2f, 209.65f, 211.15f, 212.65f, 214.15f, 215.65f, + 217.15f, 218.65f, 220.15f, 221.65f, 223.15f, 224.65f, 226.15f, 227.65f, 229.15f, 230.65f, 232.15f, 233.65f, 235.15f, 236.65f, 238.15f, 239.65f, 241.15f, 242.65f, 244.15f, 245.65f}; + +GPUconstexpr() uint8_t mNPads[GPUCA_ROW_COUNT] = {66, 66, 66, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, + 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 92, 92, 92, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, + 82, 84, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 102, 102, 102, 104, 104, 104, 106, 110, + 110, 112, 112, 112, 114, 114, 114, 116, 116, 116, 118, 118, 118, 118, 118, 120, 120, 122, 122, 124, 124, 124, 126, 126, 128, 128, 128, 130, 130, 132, 132, 132, 134, 134, 136, 136, 138, 138}; + +GPUconstexpr() uint8_t mRegion[GPUCA_ROW_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; +GPUconstexpr() uint8_t mRegionRows[10] = {17, 15, 16, 15, 18, 16, 16, 14, 13, 12}; +GPUconstexpr() uint8_t mRegionStart[10] = {0, 17, 32, 48, 63, 81, 97, 113, 127, 140}; + +GPUconstexpr() uint8_t mSampaMapping[10] = {0, 0, 1, 1, 2, 3, 3, 4, 4, 2}; +GPUconstexpr() uint8_t mChannelOffset[10] = {0, 16, 0, 16, 0, 0, 16, 0, 16, 16}; +GPUconstexpr() uint8_t mSectorFECOffset[5] = {0, 15, 15 + 18, 15 + 18 + 18, 15 + 18 + 18 + 20}; + +GPUconstexpr() float mPadHeight[10] = {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}; +GPUconstexpr() float mPadWidth[10] = {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}; + +constexpr float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! #else - const float mX[GPUCA_ROW_COUNT] = {85.195f, 85.945f, 86.695f, 87.445f, 88.195f, 88.945f, 89.695f, 90.445f, 91.195f, 91.945f, 92.695f, 93.445f, 94.195f, 94.945f, 95.695f, 96.445f, 97.195f, 97.945f, 98.695f, 99.445f, 100.195f, 100.945f, 101.695f, - 102.445f, 103.195f, 103.945f, 104.695f, 105.445f, 106.195f, 106.945f, 107.695f, 108.445f, 109.195f, 109.945f, 110.695f, 111.445f, 112.195f, 112.945f, 113.695f, 114.445f, 115.195f, 115.945f, 116.695f, 117.445f, 118.195f, 118.945f, - 119.695f, 120.445f, 121.195f, 121.945f, 122.695f, 123.445f, 124.195f, 124.945f, 125.695f, 126.445f, 127.195f, 127.945f, 128.695f, 129.445f, 130.195f, 130.945f, 131.695f, 135.180f, 136.180f, 137.180f, 138.180f, 139.180f, 140.180f, - 141.180f, 142.180f, 143.180f, 144.180f, 145.180f, 146.180f, 147.180f, 148.180f, 149.180f, 150.180f, 151.180f, 152.180f, 153.180f, 154.180f, 155.180f, 156.180f, 157.180f, 158.180f, 159.180f, 160.180f, 161.180f, 162.180f, 163.180f, - 164.180f, 165.180f, 166.180f, 167.180f, 168.180f, 169.180f, 170.180f, 171.180f, 172.180f, 173.180f, 174.180f, 175.180f, 176.180f, 177.180f, 178.180f, 179.180f, 180.180f, 181.180f, 182.180f, 183.180f, 184.180f, 185.180f, 186.180f, - 187.180f, 188.180f, 189.180f, 190.180f, 191.180f, 192.180f, 193.180f, 194.180f, 195.180f, 196.180f, 197.180f, 198.180f, 199.430f, 200.930f, 202.430f, 203.930f, 205.430f, 206.930f, 208.430f, 209.930f, 211.430f, 212.930f, 214.430f, - 215.930f, 217.430f, 218.930f, 220.430f, 221.930f, 223.430f, 224.930f, 226.430f, 227.930f, 229.430f, 230.930f, 232.430f, 233.930f, 235.430f, 236.930f, 238.430f, 239.930f, 241.430f, 242.930f, 244.430f, 245.930f}; - - const uint8_t mNPads[GPUCA_ROW_COUNT] = {68, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 92, 92, 92, - 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 108, 108, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, 82, 84, 84, - 84, 86, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 96, 96, 96, 96, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 106, 108, - 108, 108, 110, 110, 110, 110, 112, 112, 114, 114, 114, 116, 116, 118, 118, 120, 120, 122, 122, 122, 124, 124, 126, 126, 128, 128, 130, 130, 130, 132, 132, 134, 134, 136, 136, 138, 138, 138, 140}; - - const float mPadHeight[3] = {.75f, 1.f, 1.5f}; - const float mPadWidth[3] = {.4f, .6f, .6f}; +GPUconstexpr() float mX[GPUCA_ROW_COUNT] = {85.195f, 85.945f, 86.695f, 87.445f, 88.195f, 88.945f, 89.695f, 90.445f, 91.195f, 91.945f, 92.695f, 93.445f, 94.195f, 94.945f, 95.695f, 96.445f, 97.195f, 97.945f, 98.695f, 99.445f, 100.195f, 100.945f, 101.695f, + 102.445f, 103.195f, 103.945f, 104.695f, 105.445f, 106.195f, 106.945f, 107.695f, 108.445f, 109.195f, 109.945f, 110.695f, 111.445f, 112.195f, 112.945f, 113.695f, 114.445f, 115.195f, 115.945f, 116.695f, 117.445f, 118.195f, 118.945f, + 119.695f, 120.445f, 121.195f, 121.945f, 122.695f, 123.445f, 124.195f, 124.945f, 125.695f, 126.445f, 127.195f, 127.945f, 128.695f, 129.445f, 130.195f, 130.945f, 131.695f, 135.180f, 136.180f, 137.180f, 138.180f, 139.180f, 140.180f, + 141.180f, 142.180f, 143.180f, 144.180f, 145.180f, 146.180f, 147.180f, 148.180f, 149.180f, 150.180f, 151.180f, 152.180f, 153.180f, 154.180f, 155.180f, 156.180f, 157.180f, 158.180f, 159.180f, 160.180f, 161.180f, 162.180f, 163.180f, + 164.180f, 165.180f, 166.180f, 167.180f, 168.180f, 169.180f, 170.180f, 171.180f, 172.180f, 173.180f, 174.180f, 175.180f, 176.180f, 177.180f, 178.180f, 179.180f, 180.180f, 181.180f, 182.180f, 183.180f, 184.180f, 185.180f, 186.180f, + 187.180f, 188.180f, 189.180f, 190.180f, 191.180f, 192.180f, 193.180f, 194.180f, 195.180f, 196.180f, 197.180f, 198.180f, 199.430f, 200.930f, 202.430f, 203.930f, 205.430f, 206.930f, 208.430f, 209.930f, 211.430f, 212.930f, 214.430f, + 215.930f, 217.430f, 218.930f, 220.430f, 221.930f, 223.430f, 224.930f, 226.430f, 227.930f, 229.430f, 230.930f, 232.430f, 233.930f, 235.430f, 236.930f, 238.430f, 239.930f, 241.430f, 242.930f, 244.430f, 245.930f}; + +GPUconstexpr() uint8_t mNPads[GPUCA_ROW_COUNT] = {68, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 92, 92, 92, + 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 108, 108, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, 82, 84, 84, + 84, 86, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 96, 96, 96, 96, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 106, 108, + 108, 108, 110, 110, 110, 110, 112, 112, 114, 114, 114, 116, 116, 118, 118, 120, 120, 122, 122, 122, 124, 124, 126, 126, 128, 128, 130, 130, 130, 132, 132, 134, 134, 136, 136, 138, 138, 138, 140}; + +GPUconstexpr() float mPadHeight[3] = {.75f, 1.f, 1.5f}; +GPUconstexpr() float mPadWidth[3] = {.4f, .6f, .6f}; + +constexpr float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! +#endif +} // namespace gputpcgeometry_internal - static constexpr float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! +class GPUTPCGeometry // TODO: Make values constexpr +{ + static constexpr float FACTOR_Z2T = 1.f / gputpcgeometry_internal::FACTOR_T2Z; public: - GPUd() int32_t GetRegion(int32_t row) const { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } - GPUd() int32_t GetRegionRows(int32_t region) const { return 0; } // dummy - GPUd() int32_t GetRegionStart(int32_t region) const { return 0; } // dummy - GPUd() int32_t GetROC(int32_t row) const { return GetRegion(row); } - GPUd() int32_t EndIROC() const { return 63; } - GPUd() int32_t EndOROC1() const { return 63 + 64; } - GPUd() int32_t EndOROC2() const { return GPUCA_ROW_COUNT; } +#ifdef GPUCA_TPC_GEOMETRY_O2 + GPUd() static constexpr int32_t GetRegion(int32_t row) { return gputpcgeometry_internal::mRegion[row]; } + GPUd() static constexpr int32_t GetRegionRows(int32_t region) { return gputpcgeometry_internal::mRegionRows[region]; } + GPUd() static constexpr int32_t GetRegionStart(int32_t region) { return gputpcgeometry_internal::mRegionStart[region]; } + GPUd() static constexpr int32_t GetSampaMapping(int32_t region) { return gputpcgeometry_internal::mSampaMapping[region]; } + GPUd() static constexpr int32_t GetChannelOffset(int32_t region) { return gputpcgeometry_internal::mChannelOffset[region]; } + GPUd() static constexpr int32_t GetSectorFECOffset(int32_t partition) { return gputpcgeometry_internal::mSectorFECOffset[partition]; } + GPUd() static constexpr int32_t GetROC(int32_t row) { return row < 97 ? (row < 63 ? 0 : 1) : (row < 127 ? 2 : 3); } + GPUd() static constexpr int32_t EndIROC() { return 63; } + GPUd() static constexpr int32_t EndOROC1() { return 97; } + GPUd() static constexpr int32_t EndOROC2() { return 127; } +#else + GPUd() static constexpr int32_t GetRegion(int32_t row) { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } + GPUd() static constexpr int32_t GetRegionRows(int32_t region) { return 0; } // dummy + GPUd() static constexpr int32_t GetRegionStart(int32_t region) { return 0; } // dummy + GPUd() static constexpr int32_t GetROC(int32_t row) { return GetRegion(row); } + GPUd() static constexpr int32_t EndIROC() { return 63; } + GPUd() static constexpr int32_t EndOROC1() { return 63 + 64; } + GPUd() static constexpr int32_t EndOROC2() { return GPUCA_ROW_COUNT; } #endif - private: - static constexpr float FACTOR_Z2T = 1.f / FACTOR_T2Z; - public: GPUd() static constexpr float TPCLength() { return 250.f - 0.275f; } - GPUd() float Row2X(int32_t row) const { return (mX[row]); } - GPUd() float PadHeight(int32_t row) const { return (mPadHeight[GetRegion(row)]); } - GPUd() float PadHeightByRegion(int32_t region) const { return (mPadHeight[region]); } - GPUd() float PadWidth(int32_t row) const { return (mPadWidth[GetRegion(row)]); } - GPUd() uint8_t NPads(int32_t row) const { return mNPads[row]; } + GPUd() static constexpr float Row2X(int32_t row) { return (gputpcgeometry_internal::mX[row]); } + GPUd() static constexpr float PadHeight(int32_t row) { return (gputpcgeometry_internal::mPadHeight[GetRegion(row)]); } + GPUd() static constexpr float PadHeightByRegion(int32_t region) { return (gputpcgeometry_internal::mPadHeight[region]); } + GPUd() static constexpr float PadWidth(int32_t row) { return (gputpcgeometry_internal::mPadWidth[GetRegion(row)]); } + GPUd() static constexpr uint8_t NPads(int32_t row) { return gputpcgeometry_internal::mNPads[row]; } - GPUd() float LinearPad2Y(int32_t sector, int32_t row, float pad) const + GPUd() static constexpr float LinearPad2Y(int32_t sector, int32_t row, float pad) { #ifdef GPUCA_TPC_GEOMETRY_O2 - const float u = (pad - 0.5f * (mNPads[row] - 1)) * PadWidth(row); + const float u = (pad - 0.5f * (gputpcgeometry_internal::mNPads[row] - 1)) * PadWidth(row); #else - const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); + const float u = (pad - 0.5f * gputpcgeometry_internal::mNPads[row]) * PadWidth(row); #endif return (sector >= GPUCA_NSECTORS / 2) ? -u : u; } - GPUd() static float LinearTime2Z(int32_t sector, float time) + GPUd() static constexpr float LinearTime2Z(int32_t sector, float time) { - const float v = 250.f - time * FACTOR_T2Z; // Used in compression, must remain constant at 250cm! + const float v = 250.f - time * gputpcgeometry_internal::FACTOR_T2Z; // Used in compression, must remain constant at 250cm! return (sector >= GPUCA_NSECTORS / 2) ? -v : v; } - GPUd() float LinearY2Pad(int32_t sector, int32_t row, float y) const + GPUd() static constexpr float LinearY2Pad(int32_t sector, int32_t row, float y) { const float u = (sector >= GPUCA_NSECTORS / 2) ? -y : y; #ifdef GPUCA_TPC_GEOMETRY_O2 - return u / PadWidth(row) + 0.5f * (mNPads[row] - 1); + return u / PadWidth(row) + 0.5f * (gputpcgeometry_internal::mNPads[row] - 1); #else - return u / PadWidth(row) + 0.5f * mNPads[row]; + return u / PadWidth(row) + 0.5f * gputpcgeometry_internal::mNPads[row]; #endif } - GPUd() static float LinearZ2Time(int32_t sector, float z) + GPUd() static constexpr float LinearZ2Time(int32_t sector, float z) { const float v = (sector >= GPUCA_NSECTORS / 2) ? -z : z; return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } }; + } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index 73f7cb439775a..b8c2bd81d8b37 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -31,8 +31,8 @@ namespace gpu { struct ChargePos; -class GPUTPCGeometry; struct GPUParam; +class GPUTPCGeometry; class ClusterAccumulator { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index f59102aa6b5c3..3e9ea2c6f608b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -48,8 +48,6 @@ struct TPCPadGainCalib; struct ChargePos; -class GPUTPCGeometry; - class GPUTPCClusterFinder : public GPUProcessor { public: From 5e8b7b5b23362a0036df67ae6b8e52296de5573c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 23:50:11 +0100 Subject: [PATCH 0056/1764] GPU Math: Revert some changes which are UB --- GPU/Common/GPUCommonMath.h | 18 ++++++++---------- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index e977b3679a4ee..c412662fc0c64 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -94,14 +94,12 @@ class GPUCommonMath GPUd() constexpr static float Pi() { return 3.1415927f; } GPUd() constexpr static float Round(float x); GPUd() constexpr static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } - GPUd() static uint32_t Float2UIntReint(float x); + GPUd() static uint32_t Float2UIntReint(const float& x); GPUd() constexpr static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUd() constexpr static int32_t Float2IntRn(float x); GPUd() constexpr static float Modf(float x, float y); - GPUd() constexpr static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } - GPUd() constexpr static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } - GPUd() constexpr static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() constexpr static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() constexpr static bool Finite(float x); + GPUd() constexpr static bool IsNaN(float x); GPUd() constexpr static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } GPUd() constexpr static uint32_t Clz(uint32_t val); GPUd() constexpr static uint32_t Popcount(uint32_t val); @@ -241,7 +239,7 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y) GPUdi() constexpr float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } -GPUdi() uint32_t GPUCommonMath::Float2UIntReint(float x) +GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) { #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) return __float_as_uint(x); @@ -266,8 +264,8 @@ GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((floa GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } -GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } -GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } +GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } +GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } #else GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } @@ -282,8 +280,8 @@ GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } -GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return true; } -GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } +GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; } +GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 1c61316ed454e..b9620b9385c73 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -51,7 +51,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") if (GPUCA_BUILD_DEBUG_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang endif() set(CMAKE_BUILD_TYPE DEBUG) else() From c3d005fe4386ec4b9da41bd63c75d1abb3b5353c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 09:35:43 +0100 Subject: [PATCH 0057/1764] GPU TPC CF: Split clusterizer CXX functions out into .inc file to be used externally --- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 231 +--------------- .../TPCClusterFinder/GPUTPCCFClusterizer.h | 8 +- .../TPCClusterFinder/GPUTPCCFClusterizer.inc | 247 ++++++++++++++++++ 3 files changed, 253 insertions(+), 233 deletions(-) create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index 407deb6a588d0..2131347decec6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -25,6 +25,8 @@ using namespace o2::gpu; using namespace o2::gpu::tpccf; +#include "GPUTPCCFClusterizer.inc" + template <> GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC) { @@ -35,232 +37,3 @@ GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow); } - -GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, - processorType& clusterer, - const CfFragment& fragment, - GPUSharedMemory& smem, - const Array2D& chargeMap, - const ChargePos* filteredPeakPositions, - const GPUSettingsRec& calib, - MCLabelAccumulator* labelAcc, - uint32_t clusternum, - uint32_t maxClusterPerRow, - uint32_t* clusterInRow, - tpc::ClusterNative* clusterByRow, - uint32_t* clusterPosInRow) -{ - uint32_t idx = get_global_id(0); - - // For certain configurations dummy work items are added, so the total - // number of work items is dividable by 64. - // These dummy items also compute the last cluster but discard the result. - ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; - Charge charge = chargeMap[pos].unpack(); - - ClusterAccumulator pc; - CPU_ONLY(labelAcc->collect(pos, charge)); - - buildCluster( - calib, - chargeMap, - pos, - smem.posBcast, - smem.buf, - smem.innerAboveThreshold, - &pc, - labelAcc); - - if (idx >= clusternum) { - return; - } - if (fragment.isOverlap(pos.time())) { - if (clusterPosInRow) { - clusterPosInRow[idx] = maxClusterPerRow; - } - return; - } - tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); - - if (rejectCluster) { - if (clusterPosInRow) { - clusterPosInRow[idx] = maxClusterPerRow; - } - return; - } - - uint32_t rowIndex = 0; - if (clusterByRow != nullptr) { - rowIndex = sortIntoBuckets( - clusterer, - myCluster, - pos.row(), - maxClusterPerRow, - clusterInRow, - clusterByRow); - if (clusterPosInRow != nullptr) { - clusterPosInRow[idx] = rowIndex; - } - } else if (clusterPosInRow) { - rowIndex = clusterPosInRow[idx]; - } - - CPU_ONLY(labelAcc->commit(pos.row(), rowIndex, maxClusterPerRow)); -} - -GPUdii() void GPUTPCCFClusterizer::updateClusterInner( - const GPUSettingsRec& calib, - uint16_t lid, - uint16_t N, - const PackedCharge* buf, - const ChargePos& pos, - ClusterAccumulator* cluster, - MCLabelAccumulator* labelAcc, - uint8_t* innerAboveThreshold) -{ - uint8_t aboveThreshold = 0; - - GPUCA_UNROLL(U(), U()) - for (uint16_t i = 0; i < N; i++) { - Delta2 d = cfconsts::InnerNeighbors[i]; - - PackedCharge p = buf[N * lid + i]; - - Charge q = cluster->updateInner(p, d); - - CPU_ONLY(labelAcc->collect(pos.delta(d), q)); - - aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i); - } - - innerAboveThreshold[lid] = aboveThreshold; - - GPUbarrier(); -} - -GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( - uint16_t lid, - uint16_t N, - uint16_t M, - uint16_t offset, - const PackedCharge* buf, - const ChargePos& pos, - ClusterAccumulator* cluster, - MCLabelAccumulator* labelAcc) -{ - GPUCA_UNROLL(U(), U()) - for (uint16_t i = offset; i < M + offset; i++) { - PackedCharge p = buf[N * lid + i]; - - Delta2 d = cfconsts::OuterNeighbors[i]; - - Charge q = cluster->updateOuter(p, d); - static_cast(q); // Avoid unused varible warning on GPU. - - CPU_ONLY(labelAcc->collect(pos.delta(d), q)); - } -} - -GPUdii() void GPUTPCCFClusterizer::buildCluster( - const GPUSettingsRec& calib, - const Array2D& chargeMap, - ChargePos pos, - ChargePos* posBcast, - PackedCharge* buf, - uint8_t* innerAboveThreshold, - ClusterAccumulator* myCluster, - MCLabelAccumulator* labelAcc) -{ - uint16_t ll = get_local_id(0); - - posBcast[ll] = pos; - GPUbarrier(); - - CfUtils::blockLoad( - chargeMap, - SCRATCH_PAD_WORK_GROUP_SIZE, - SCRATCH_PAD_WORK_GROUP_SIZE, - ll, - 0, - 8, - cfconsts::InnerNeighbors, - posBcast, - buf); - updateClusterInner( - calib, - ll, - 8, - buf, - pos, - myCluster, - labelAcc, - innerAboveThreshold); - - uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2; - - bool inGroup1 = ll < wgSizeHalf; - - uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf); - - CfUtils::condBlockLoad( - chargeMap, - wgSizeHalf, - SCRATCH_PAD_WORK_GROUP_SIZE, - ll, - 0, - 16, - cfconsts::OuterNeighbors, - posBcast, - innerAboveThreshold, - buf); - - if (inGroup1) { - updateClusterOuter( - llhalf, - 16, - 16, - 0, - buf, - pos, - myCluster, - labelAcc); - } - -#if defined(GPUCA_GPUCODE) - CfUtils::condBlockLoad( - chargeMap, - wgSizeHalf, - SCRATCH_PAD_WORK_GROUP_SIZE, - ll, - 0, - 16, - cfconsts::OuterNeighbors, - posBcast + wgSizeHalf, - innerAboveThreshold + wgSizeHalf, - buf); - if (!inGroup1) { - updateClusterOuter( - llhalf, - 16, - 16, - 0, - buf, - pos, - myCluster, - labelAcc); - } -#endif -} - -GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets) -{ - uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u); - if (index < maxElemsPerBucket) { - buckets[maxElemsPerBucket * row + index] = cluster; - } else { - clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket); - CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket); - } - return index; -} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index 411c38c39459e..79f3325ed9ad2 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -59,14 +59,14 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*); + static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); + + static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); + private: static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*); static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*); - - static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); - - static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc new file mode 100644 index 0000000000000..c6d7a3b68c0a8 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc @@ -0,0 +1,247 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCCFClusterizer.cxx +/// \author Felix Weiglhofer + +#ifndef O2_GPU_CLUSTERIZER_INC_H +#define O2_GPU_CLUSTERIZER_INC_H + +GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, + processorType& clusterer, + const CfFragment& fragment, + GPUSharedMemory& smem, + const Array2D& chargeMap, + const ChargePos* filteredPeakPositions, + const GPUSettingsRec& calib, + MCLabelAccumulator* labelAcc, + uint32_t clusternum, + uint32_t maxClusterPerRow, + uint32_t* clusterInRow, + tpc::ClusterNative* clusterByRow, + uint32_t* clusterPosInRow) +{ + uint32_t idx = get_global_id(0); + + // For certain configurations dummy work items are added, so the total + // number of work items is dividable by 64. + // These dummy items also compute the last cluster but discard the result. + ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; + Charge charge = chargeMap[pos].unpack(); + + ClusterAccumulator pc; + CPU_ONLY(labelAcc->collect(pos, charge)); + + buildCluster( + calib, + chargeMap, + pos, + smem.posBcast, + smem.buf, + smem.innerAboveThreshold, + &pc, + labelAcc); + + if (idx >= clusternum) { + return; + } + if (fragment.isOverlap(pos.time())) { + if (clusterPosInRow) { + clusterPosInRow[idx] = maxClusterPerRow; + } + return; + } + tpc::ClusterNative myCluster; + bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); + + if (rejectCluster) { + if (clusterPosInRow) { + clusterPosInRow[idx] = maxClusterPerRow; + } + return; + } + + uint32_t rowIndex = 0; + if (clusterByRow != nullptr) { + rowIndex = sortIntoBuckets( + clusterer, + myCluster, + pos.row(), + maxClusterPerRow, + clusterInRow, + clusterByRow); + if (clusterPosInRow != nullptr) { + clusterPosInRow[idx] = rowIndex; + } + } else if (clusterPosInRow) { + rowIndex = clusterPosInRow[idx]; + } + + CPU_ONLY(labelAcc->commit(pos.row(), rowIndex, maxClusterPerRow)); +} + +GPUdii() void GPUTPCCFClusterizer::updateClusterInner( + const GPUSettingsRec& calib, + uint16_t lid, + uint16_t N, + const PackedCharge* buf, + const ChargePos& pos, + ClusterAccumulator* cluster, + MCLabelAccumulator* labelAcc, + uint8_t* innerAboveThreshold) +{ + uint8_t aboveThreshold = 0; + + GPUCA_UNROLL(U(), U()) + for (uint16_t i = 0; i < N; i++) { + Delta2 d = cfconsts::InnerNeighbors[i]; + + PackedCharge p = buf[N * lid + i]; + + Charge q = cluster->updateInner(p, d); + + CPU_ONLY(labelAcc->collect(pos.delta(d), q)); + + aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i); + } + + innerAboveThreshold[lid] = aboveThreshold; + + GPUbarrier(); +} + +GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( + uint16_t lid, + uint16_t N, + uint16_t M, + uint16_t offset, + const PackedCharge* buf, + const ChargePos& pos, + ClusterAccumulator* cluster, + MCLabelAccumulator* labelAcc) +{ + GPUCA_UNROLL(U(), U()) + for (uint16_t i = offset; i < M + offset; i++) { + PackedCharge p = buf[N * lid + i]; + + Delta2 d = cfconsts::OuterNeighbors[i]; + + Charge q = cluster->updateOuter(p, d); + static_cast(q); // Avoid unused varible warning on GPU. + + CPU_ONLY(labelAcc->collect(pos.delta(d), q)); + } +} + +GPUdii() void GPUTPCCFClusterizer::buildCluster( + const GPUSettingsRec& calib, + const Array2D& chargeMap, + ChargePos pos, + ChargePos* posBcast, + PackedCharge* buf, + uint8_t* innerAboveThreshold, + ClusterAccumulator* myCluster, + MCLabelAccumulator* labelAcc) +{ + uint16_t ll = get_local_id(0); + + posBcast[ll] = pos; + GPUbarrier(); + + CfUtils::blockLoad( + chargeMap, + SCRATCH_PAD_WORK_GROUP_SIZE, + SCRATCH_PAD_WORK_GROUP_SIZE, + ll, + 0, + 8, + cfconsts::InnerNeighbors, + posBcast, + buf); + updateClusterInner( + calib, + ll, + 8, + buf, + pos, + myCluster, + labelAcc, + innerAboveThreshold); + + uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2; + + bool inGroup1 = ll < wgSizeHalf; + + uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf); + + CfUtils::condBlockLoad( + chargeMap, + wgSizeHalf, + SCRATCH_PAD_WORK_GROUP_SIZE, + ll, + 0, + 16, + cfconsts::OuterNeighbors, + posBcast, + innerAboveThreshold, + buf); + + if (inGroup1) { + updateClusterOuter( + llhalf, + 16, + 16, + 0, + buf, + pos, + myCluster, + labelAcc); + } + +#if defined(GPUCA_GPUCODE) + CfUtils::condBlockLoad( + chargeMap, + wgSizeHalf, + SCRATCH_PAD_WORK_GROUP_SIZE, + ll, + 0, + 16, + cfconsts::OuterNeighbors, + posBcast + wgSizeHalf, + innerAboveThreshold + wgSizeHalf, + buf); + if (!inGroup1) { + updateClusterOuter( + llhalf, + 16, + 16, + 0, + buf, + pos, + myCluster, + labelAcc); + } +#endif +} + +GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets) +{ + uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u); + if (index < maxElemsPerBucket) { + buckets[maxElemsPerBucket * row + index] = cluster; + } else { + clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket); + CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket); + } + return index; +} + +#endif // O2_GPU_CLUSTERIZER_INC_H From db7b2f057d285cd65a434885fe9cbdbcadcfe18b Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Thu, 13 Mar 2025 17:54:22 +0100 Subject: [PATCH 0058/1764] GPU ITS: Fix broken initialisation + cleanup (#14058) --- .../tracking/include/ITStracking/TrackingConfigParam.h | 6 +++--- .../ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 10 +++++----- Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx | 1 - GPU/Workflow/src/GPUWorkflowITS.cxx | 1 - 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index 0cf44d08cac19..ec96321765534 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -56,13 +56,13 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper { // Use TGeo for mat. budget static const int MaxIter = 4; - static const int MinTrackLenght = 4; - static const int MaxTrackLenght = 7; + static const int MinTrackLength = 4; + static const int MaxTrackLength = 7; bool useMatCorrTGeo = false; // use full geometry to corect for material budget accounting in the fits. Default is to use the material budget LUT. bool useFastMaterial = false; // use faster material approximation for material budget accounting in the fits. int deltaRof = 0; // configure the width of the window in ROFs to be considered for the tracking. int minTrackLgtIter[MaxIter] = {}; // minimum track length at each iteration, used only if >0, otherwise use code defaults - float minPtIterLgt[MaxIter * (MaxTrackLenght - MinTrackLenght + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults + float minPtIterLgt[MaxIter * (MaxTrackLength - MinTrackLength + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults float sysErrY2[7] = {0}; // systematic error^2 in Y per layer float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer float maxChi2ClusterAttachment = -1.f; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index b264ac46bc7b3..2c94c9bdb1f46 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -81,8 +81,8 @@ void ITSTrackingInterface::initialise() if (trackConf.minTrackLgtIter[ip] > 0) { param.MinTrackLength = trackConf.minTrackLgtIter[ip]; } - for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { - int lslot0 = (trackConf.MaxTrackLenght - ilg), lslot = lslot0 + ip * (trackConf.MaxTrackLenght - trackConf.MinTrackLenght + 1); + for (int ilg = trackConf.MaxTrackLength; ilg >= trackConf.MinTrackLength; ilg--) { + int lslot0 = (trackConf.MaxTrackLength - ilg), lslot = lslot0 + ip * (trackConf.MaxTrackLength - trackConf.MinTrackLength + 1); if (trackConf.minPtIterLgt[lslot] > 0.) { param.MinPt[lslot0] = trackConf.minPtIterLgt[lslot]; } @@ -127,8 +127,8 @@ void ITSTrackingInterface::initialise() for (size_t ip = 0; ip < trackParams.size(); ip++) { auto& param = trackParams[ip]; param.TrackletMinPt *= bFactor; - for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { - int lslot = trackConf.MaxTrackLenght - ilg; + for (int ilg = trackConf.MaxTrackLength; ilg >= trackConf.MinTrackLength; ilg--) { + int lslot = trackConf.MaxTrackLength - ilg; param.MinPt[lslot] *= bFactor; } } @@ -384,7 +384,7 @@ void ITSTrackingInterface::updateTimeDependentParams(framework::ProcessingContex geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::T2GRot, o2::math_utils::TransformType::T2G)); initialise(); getConfiguration(pc); - // + if (pc.services().get().inputTimesliceId == 0) { // print settings only for the 1st pipeling o2::its::VertexerParamConfig::Instance().printKeyValues(); o2::its::TrackerParamConfig::Instance().printKeyValues(); diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index dd4c40a2141d9..9e4c98ad6e9a1 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -44,7 +44,6 @@ void TrackerDPL::init(InitContext& ic) mITSTrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), mChainITS->GetITSTrackerTraits(), mChainITS->GetITSTimeframe()); - // mITSTrackingInterface.initialise() will be called from the ITSTrackingInterface::updateTimeDependentParams at 1st initialization since it needs some run conditions } void TrackerDPL::stop() diff --git a/GPU/Workflow/src/GPUWorkflowITS.cxx b/GPU/Workflow/src/GPUWorkflowITS.cxx index 552c5fca5b83e..db9303c431ae7 100644 --- a/GPU/Workflow/src/GPUWorkflowITS.cxx +++ b/GPU/Workflow/src/GPUWorkflowITS.cxx @@ -40,7 +40,6 @@ void GPURecoWorkflowSpec::initFunctionITS(o2::framework::InitContext& ic) mSpecConfig.itsOverrBeamEst); mGPUReco->GetITSTraits(trkTraits, vtxTraits, mITSTimeFrame); mITSTrackingInterface->setTraitsFromProvider(vtxTraits, trkTraits, mITSTimeFrame); - mITSTrackingInterface->initialise(); } void GPURecoWorkflowSpec::finaliseCCDBITS(o2::framework::ConcreteDataMatcher& matcher, void* obj) From b27c2a3ff29645f75f52eab793a5fb3558f1f7a3 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Thu, 13 Mar 2025 09:48:32 +0100 Subject: [PATCH 0059/1764] Making float16 variables compatible with GPU types --- Common/ML/include/ML/3rdparty/GPUORTFloat16.h | 126 ++++++++++-------- 1 file changed, 72 insertions(+), 54 deletions(-) diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h index db65328409d3c..76fd6734cf9db 100644 --- a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h +++ b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h @@ -5,10 +5,18 @@ // - https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_float16.h // - https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_cxx_api.h +#ifndef GPUORTFLOAT16_H +#define GPUORTFLOAT16_H + +#ifndef GPUCA_GPUCODE_DEVICE #include #include #include #include +#endif + +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" namespace o2 { @@ -50,19 +58,19 @@ struct Float16Impl { /// /// /// - constexpr static uint16_t ToUint16Impl(float v) noexcept; + GPUd() constexpr static uint16_t ToUint16Impl(float v) noexcept; /// /// Converts float16 to float /// /// float representation of float16 value - float ToFloatImpl() const noexcept; + GPUd() float ToFloatImpl() const noexcept; /// /// Creates an instance that represents absolute value. /// /// Absolute value - uint16_t AbsImpl() const noexcept + GPUd() uint16_t AbsImpl() const noexcept { return static_cast(val & ~kSignMask); } @@ -71,7 +79,7 @@ struct Float16Impl { /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - uint16_t NegateImpl() const noexcept + GPUd() uint16_t NegateImpl() const noexcept { return IsNaN() ? val : static_cast(val ^ kSignMask); } @@ -92,13 +100,13 @@ struct Float16Impl { uint16_t val{0}; - Float16Impl() = default; + GPUdDefault() Float16Impl() = default; /// /// Checks if the value is negative /// /// true if negative - bool IsNegative() const noexcept + GPUd() bool IsNegative() const noexcept { return static_cast(val) < 0; } @@ -107,7 +115,7 @@ struct Float16Impl { /// Tests if the value is NaN /// /// true if NaN - bool IsNaN() const noexcept + GPUd() bool IsNaN() const noexcept { return AbsImpl() > kPositiveInfinityBits; } @@ -116,7 +124,7 @@ struct Float16Impl { /// Tests if the value is finite /// /// true if finite - bool IsFinite() const noexcept + GPUd() bool IsFinite() const noexcept { return AbsImpl() < kPositiveInfinityBits; } @@ -125,7 +133,7 @@ struct Float16Impl { /// Tests if the value represents positive infinity. /// /// true if positive infinity - bool IsPositiveInfinity() const noexcept + GPUd() bool IsPositiveInfinity() const noexcept { return val == kPositiveInfinityBits; } @@ -134,7 +142,7 @@ struct Float16Impl { /// Tests if the value represents negative infinity /// /// true if negative infinity - bool IsNegativeInfinity() const noexcept + GPUd() bool IsNegativeInfinity() const noexcept { return val == kNegativeInfinityBits; } @@ -143,7 +151,7 @@ struct Float16Impl { /// Tests if the value is either positive or negative infinity. /// /// True if absolute value is infinity - bool IsInfinity() const noexcept + GPUd() bool IsInfinity() const noexcept { return AbsImpl() == kPositiveInfinityBits; } @@ -152,7 +160,7 @@ struct Float16Impl { /// Tests if the value is NaN or zero. Useful for comparisons. /// /// True if NaN or zero. - bool IsNaNOrZero() const noexcept + GPUd() bool IsNaNOrZero() const noexcept { auto abs = AbsImpl(); return (abs == 0 || abs > kPositiveInfinityBits); @@ -162,7 +170,7 @@ struct Float16Impl { /// Tests if the value is normal (not zero, subnormal, infinite, or NaN). /// /// True if so - bool IsNormal() const noexcept + GPUd() bool IsNormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -174,7 +182,7 @@ struct Float16Impl { /// Tests if the value is subnormal (denormal). /// /// True if so - bool IsSubnormal() const noexcept + GPUd() bool IsSubnormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -186,13 +194,13 @@ struct Float16Impl { /// Creates an instance that represents absolute value. /// /// Absolute value - Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } + GPUd() Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } /// /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } + GPUd() Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } /// /// IEEE defines that positive and negative zero are equal, this gives us a quick equality check @@ -202,12 +210,12 @@ struct Float16Impl { /// first value /// second value /// True if both arguments represent zero - static bool AreZero(const Float16Impl& lhs, const Float16Impl& rhs) noexcept + GPUd() static bool AreZero(const Float16Impl& lhs, const Float16Impl& rhs) noexcept { return static_cast((lhs.val | rhs.val) & ~kSignMask) == 0; } - bool operator==(const Float16Impl& rhs) const noexcept + GPUd() bool operator==(const Float16Impl& rhs) const noexcept { if (IsNaN() || rhs.IsNaN()) { // IEEE defines that NaN is not equal to anything, including itself. @@ -216,9 +224,9 @@ struct Float16Impl { return val == rhs.val; } - bool operator!=(const Float16Impl& rhs) const noexcept { return !(*this == rhs); } + GPUd() bool operator!=(const Float16Impl& rhs) const noexcept { return !(*this == rhs); } - bool operator<(const Float16Impl& rhs) const noexcept + GPUd() bool operator<(const Float16Impl& rhs) const noexcept { if (IsNaN() || rhs.IsNaN()) { // IEEE defines that NaN is unordered with respect to everything, including itself. @@ -267,7 +275,7 @@ union float32_bits { }; // namespace detail template -inline constexpr uint16_t Float16Impl::ToUint16Impl(float v) noexcept +GPUdi() constexpr uint16_t Float16Impl::ToUint16Impl(float v) noexcept { detail::float32_bits f{}; f.f = v; @@ -316,7 +324,7 @@ inline constexpr uint16_t Float16Impl::ToUint16Impl(float v) noexcept } template -inline float Float16Impl::ToFloatImpl() const noexcept +GPUdi() float Float16Impl::ToFloatImpl() const noexcept { constexpr detail::float32_bits magic = {113 << 23}; constexpr unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift @@ -356,19 +364,19 @@ struct BFloat16Impl { /// /// /// - static uint16_t ToUint16Impl(float v) noexcept; + GPUd() static uint16_t ToUint16Impl(float v) noexcept; /// /// Converts bfloat16 to float /// /// float representation of bfloat16 value - float ToFloatImpl() const noexcept; + GPUd() float ToFloatImpl() const noexcept; /// /// Creates an instance that represents absolute value. /// /// Absolute value - uint16_t AbsImpl() const noexcept + GPUd() uint16_t AbsImpl() const noexcept { return static_cast(val & ~kSignMask); } @@ -377,7 +385,7 @@ struct BFloat16Impl { /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - uint16_t NegateImpl() const noexcept + GPUd() uint16_t NegateImpl() const noexcept { return IsNaN() ? val : static_cast(val ^ kSignMask); } @@ -400,13 +408,13 @@ struct BFloat16Impl { uint16_t val{0}; - BFloat16Impl() = default; + GPUdDefault() BFloat16Impl() = default; /// /// Checks if the value is negative /// /// true if negative - bool IsNegative() const noexcept + GPUd() bool IsNegative() const noexcept { return static_cast(val) < 0; } @@ -415,7 +423,7 @@ struct BFloat16Impl { /// Tests if the value is NaN /// /// true if NaN - bool IsNaN() const noexcept + GPUd() bool IsNaN() const noexcept { return AbsImpl() > kPositiveInfinityBits; } @@ -424,7 +432,7 @@ struct BFloat16Impl { /// Tests if the value is finite /// /// true if finite - bool IsFinite() const noexcept + GPUd() bool IsFinite() const noexcept { return AbsImpl() < kPositiveInfinityBits; } @@ -433,7 +441,7 @@ struct BFloat16Impl { /// Tests if the value represents positive infinity. /// /// true if positive infinity - bool IsPositiveInfinity() const noexcept + GPUd() bool IsPositiveInfinity() const noexcept { return val == kPositiveInfinityBits; } @@ -442,7 +450,7 @@ struct BFloat16Impl { /// Tests if the value represents negative infinity /// /// true if negative infinity - bool IsNegativeInfinity() const noexcept + GPUd() bool IsNegativeInfinity() const noexcept { return val == kNegativeInfinityBits; } @@ -451,7 +459,7 @@ struct BFloat16Impl { /// Tests if the value is either positive or negative infinity. /// /// True if absolute value is infinity - bool IsInfinity() const noexcept + GPUd() bool IsInfinity() const noexcept { return AbsImpl() == kPositiveInfinityBits; } @@ -460,7 +468,7 @@ struct BFloat16Impl { /// Tests if the value is NaN or zero. Useful for comparisons. /// /// True if NaN or zero. - bool IsNaNOrZero() const noexcept + GPUd() bool IsNaNOrZero() const noexcept { auto abs = AbsImpl(); return (abs == 0 || abs > kPositiveInfinityBits); @@ -470,7 +478,7 @@ struct BFloat16Impl { /// Tests if the value is normal (not zero, subnormal, infinite, or NaN). /// /// True if so - bool IsNormal() const noexcept + GPUd() bool IsNormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -482,7 +490,7 @@ struct BFloat16Impl { /// Tests if the value is subnormal (denormal). /// /// True if so - bool IsSubnormal() const noexcept + GPUd() bool IsSubnormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -494,13 +502,13 @@ struct BFloat16Impl { /// Creates an instance that represents absolute value. /// /// Absolute value - Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } + GPUd() Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } /// /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } + GPUd() Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } /// /// IEEE defines that positive and negative zero are equal, this gives us a quick equality check @@ -510,7 +518,7 @@ struct BFloat16Impl { /// first value /// second value /// True if both arguments represent zero - static bool AreZero(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept + GPUd() static bool AreZero(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept { // IEEE defines that positive and negative zero are equal, this gives us a quick equality check // for two values by or'ing the private bits together and stripping the sign. They are both zero, @@ -520,14 +528,17 @@ struct BFloat16Impl { }; template -inline uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept +GPUdi() uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept { uint16_t result; - if (std::isnan(v)) { + if (o2::gpu::CAMath::IsNaN(v)) { result = kPositiveQNaNBits; } else { auto get_msb_half = [](float fl) { uint16_t result; +#ifdef GPUCA_GPUCODE + o2::gpu::CAMath::memcpy(&result, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); +#else #ifdef __cpp_if_constexpr if constexpr (detail::endian::native == detail::endian::little) #else @@ -538,6 +549,7 @@ inline uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept } else { std::memcpy(&result, &fl, sizeof(uint16_t)); } +#endif return result; }; @@ -554,14 +566,18 @@ inline uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept } template -inline float BFloat16Impl::ToFloatImpl() const noexcept +GPUdi() float BFloat16Impl::ToFloatImpl() const noexcept { if (IsNaN()) { - return std::numeric_limits::quiet_NaN(); + return o2::gpu::CAMath::QuietNaN(); } float result; char* const first = reinterpret_cast(&result); char* const second = first + sizeof(uint16_t); +#ifdef GPUCA_GPUCODE + first[0] = first[1] = 0; + o2::gpu::CAMath::memcpy(second, &val, sizeof(uint16_t)); +#else #ifdef __cpp_if_constexpr if constexpr (detail::endian::native == detail::endian::little) #else @@ -574,6 +590,7 @@ inline float BFloat16Impl::ToFloatImpl() const noexcept std::memcpy(first, &val, sizeof(uint16_t)); std::memset(second, 0, sizeof(uint16_t)); } +#endif return result; } @@ -610,26 +627,26 @@ struct Float16_t : OrtDataType::Float16Impl { /// /// Default constructor /// - Float16_t() = default; + GPUdDefault() Float16_t() = default; /// /// Explicit conversion to uint16_t representation of float16. /// /// uint16_t bit representation of float16 /// new instance of Float16_t - constexpr static Float16_t FromBits(uint16_t v) noexcept { return Float16_t(v); } + GPUd() constexpr static Float16_t FromBits(uint16_t v) noexcept { return Float16_t(v); } /// /// __ctor from float. Float is converted into float16 16-bit representation. /// /// float value - explicit Float16_t(float v) noexcept { val = Base::ToUint16Impl(v); } + GPUd() explicit Float16_t(float v) noexcept { val = Base::ToUint16Impl(v); } /// /// Converts float16 to float /// /// float representation of float16 value - float ToFloat() const noexcept { return Base::ToFloatImpl(); } + GPUd() float ToFloat() const noexcept { return Base::ToFloatImpl(); } /// /// Checks if the value is negative @@ -710,7 +727,7 @@ struct Float16_t : OrtDataType::Float16Impl { /// /// User defined conversion operator. Converts Float16_t to float. /// - explicit operator float() const noexcept { return ToFloat(); } + GPUdi() explicit operator float() const noexcept { return ToFloat(); } using Base::operator==; using Base::operator!=; @@ -751,26 +768,26 @@ struct BFloat16_t : OrtDataType::BFloat16Impl { public: using Base = OrtDataType::BFloat16Impl; - BFloat16_t() = default; + GPUdDefault() BFloat16_t() = default; /// /// Explicit conversion to uint16_t representation of bfloat16. /// /// uint16_t bit representation of bfloat16 /// new instance of BFloat16_t - static constexpr BFloat16_t FromBits(uint16_t v) noexcept { return BFloat16_t(v); } + GPUd() static constexpr BFloat16_t FromBits(uint16_t v) noexcept { return BFloat16_t(v); } /// /// __ctor from float. Float is converted into bfloat16 16-bit representation. /// /// float value - explicit BFloat16_t(float v) noexcept { val = Base::ToUint16Impl(v); } + GPUd() explicit BFloat16_t(float v) noexcept { val = Base::ToUint16Impl(v); } /// /// Converts bfloat16 to float /// /// float representation of bfloat16 value - float ToFloat() const noexcept { return Base::ToFloatImpl(); } + GPUd() float ToFloat() const noexcept { return Base::ToFloatImpl(); } /// /// Checks if the value is negative @@ -851,7 +868,7 @@ struct BFloat16_t : OrtDataType::BFloat16Impl { /// /// User defined conversion operator. Converts BFloat16_t to float. /// - explicit operator float() const noexcept { return ToFloat(); } + GPUdi() explicit operator float() const noexcept { return ToFloat(); } // We do not have an inherited impl for the below operators // as the internal class implements them a little differently @@ -864,4 +881,5 @@ static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match"); } // namespace OrtDataType -} // namespace o2 \ No newline at end of file +} // namespace o2 +#endif \ No newline at end of file From 650a6ff5d636a634a13bb7b8e20f0ec30d0f1f13 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 11:41:02 +0100 Subject: [PATCH 0060/1764] GPU TPC: Simplify host code for driving the extrapolation tracking --- GPU/GPUTracking/Base/GPUReconstruction.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 2 +- GPU/GPUTracking/Global/GPUChainTracking.h | 4 +- .../Global/GPUChainTrackingSectorTracker.cxx | 116 +++++------------- 4 files changed, 33 insertions(+), 91 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index a0248180a5e2c..93310284d7564 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -200,7 +200,7 @@ class GPUReconstruction void SetOutputControl(void* ptr, size_t size); void SetInputControl(void* ptr, size_t size); GPUOutputControl& OutputControl() { return mOutputControl; } - int32_t NStreams() const { return mNStreams; } + uint32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } RecoStepField GetRecoSteps() const { return mRecoSteps.steps; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index c186f916891ba..6dcb6f1d7e514 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -260,7 +260,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("configured max time bin exceeds 256 orbits"); return false; } - if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > mRec->NStreams()) { + if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) { GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 194573981838e..492ee65d1c9c1 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -220,7 +220,7 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); + int32_t ExtrapolationTracking(uint32_t iSector, bool blocking); int32_t PrepareProfile(); int32_t DoProfile(); @@ -278,7 +278,6 @@ class GPUChainTracking : public GPUChain // Synchronization and Locks eventStruct* mEvents = nullptr; - volatile int32_t mSectorSelectorReady = 0; std::array mExtrapolationTrackingDone; std::vector mOutputQueue; @@ -299,6 +298,7 @@ class GPUChainTracking : public GPUChain void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); + uint32_t StreamForSector(uint32_t sector) const; std::mutex mMutexUpdateCalib; std::unique_ptr mPipelineFinalizationCtx; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 522ccbad47e59..dd71a797f2744 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -24,12 +24,18 @@ using namespace o2::gpu; -int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput) +uint32_t GPUChainTracking::StreamForSector(uint32_t sector) const { - runKernel({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}}); - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams()); - if (synchronizeOutput) { - SynchronizeStream(iSector % mRec->NStreams()); + return sector % mRec->NStreams(); +} + +int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, bool blocking) +{ + const uint32_t stream = StreamForSector(iSector); + runKernel({GetGridBlk(256, stream), {iSector}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), stream); + if (blocking) { + SynchronizeStream(stream); } return (0); } @@ -153,7 +159,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { GPUTPCTracker& trk = processors()->tpcTrackers[iSector]; GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk; - int32_t useStream = (iSector % mRec->NStreams()); + int32_t useStream = StreamForSector(iSector); if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Creating Sector Data (Sector %d)", iSector); @@ -234,102 +240,38 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } if (doGPU || GetProcessingSettings().debugLevel >= 1) { - if (doGPU) { - ReleaseEvent(mEvents->init); - } - - mSectorSelectorReady = 0; - - std::array transferRunning; - transferRunning.fill(true); - if (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)) { // TODO: This seems pretty obsolete code path, can probably be removed. - if (param().rec.tpc.extrapolationTracking) { - mExtrapolationTrackingDone.fill(0); - } - - uint32_t tmpSector = 0; - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Transfering Tracks from GPU to Host"); - } - - if (tmpSector == iSector) { - SynchronizeEvents(&mEvents->sector[iSector]); - } - while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) { - ReleaseEvent(mEvents->sector[tmpSector]); - if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) { - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]); - } else { - transferRunning[tmpSector] = false; - } - tmpSector++; - } - - if (GetProcessingSettings().keepAllMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); - } - - if (transferRunning[iSector]) { - SynchronizeEvents(&mEvents->sector[iSector]); - } - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits()); - } - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Data ready for sector %d", iSector); - } - mSectorSelectorReady = iSector; - - if (param().rec.tpc.extrapolationTracking) { - for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) { - uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a); - uint32_t sectorLeft, sectorRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); - - if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mExtrapolationTrackingDone[tmpSector2] == 0) { - ExtrapolationTracking(tmpSector2, 0); - mExtrapolationTrackingDone[tmpSector2] = 1; - } - } - } - } - } if (param().rec.tpc.extrapolationTracking) { std::vector blocking(NSECTORS * mRec->NStreams()); - for (int32_t i = 0; i < NSECTORS; i++) { - for (int32_t j = 0; j < mRec->NStreams(); j++) { - blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (uint32_t iStream = 0; iStream < mRec->NStreams(); iStream++) { + blocking[iSector * mRec->NStreams() + iStream] = StreamForSector(iSector) == iStream; } } for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); - if (!(doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { - uint32_t sectorLeft, sectorRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); - if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { - StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]); - blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true; - } - if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) { - StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]); - blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true; - } + uint32_t sectorLeft, sectorRight; + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); + if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)]) { + StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorLeft]); + blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)] = true; + } + if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)]) { + StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorRight]); + blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)] = true; } - ExtrapolationTracking(tmpSector, 0, false); + ExtrapolationTracking(tmpSector, false); } } - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - if (doGPU && transferRunning[iSector]) { + if (doGPU) { + ReleaseEvent(mEvents->init); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { ReleaseEvent(mEvents->sector[iSector]); } } } else { - mSectorSelectorReady = NSECTORS; mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { if (param().rec.tpc.extrapolationTracking) { - ExtrapolationTracking(iSector, 0); + ExtrapolationTracking(iSector, true); } }); mRec->SetNActiveThreadsOuterLoop(1); From 705ebfb083c41183183c554c0cb17a6a9423e4c5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 09:05:24 +0100 Subject: [PATCH 0061/1764] GPU TPC CF: Split toNative back to finalize and toNative, to be used by NNClusterer --- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx | 9 +++++++-- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h | 3 ++- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index 77dc6e119df7d..b6792ce3a9ef5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -58,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param, TPCTime timeOffset, const Array2D& chargeMap) +GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, TPCTime timeOffset) { mQtot += q; @@ -73,8 +73,13 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::Cl Pad pad = pos.pad(); mPadMean += pad; mTimeMean += timeOffset + pos.time(); +} + +GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const Array2D& chargeMap) +{ + Pad pad = pos.pad(); - bool isEdgeCluster = pos.pad() < 2 || pos.pad() >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + bool isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge if (isEdgeCluster) { bool leftEdge = (pad < 2); if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index b8c2bd81d8b37..f0c6ac47f3c8a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -41,7 +41,8 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); - GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&, tpccf::TPCTime, const Array2D&); + GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime); + GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D&); private: float mQtot = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc index c6d7a3b68c0a8..8a6b73be8bd8d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc @@ -60,7 +60,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t return; } tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); + pc.finalize(pos, charge, fragment.start); + bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterPosInRow) { From b183c5586b9b9cf2aa9a5b1c91a06589e5577690 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 14:52:24 +0100 Subject: [PATCH 0062/1764] GPU CMake: Auto-detect ROCm even if CMake prefix path not set --- dependencies/FindO2GPU.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 57c820fbe86b1..f8d41c032078f 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -198,7 +198,10 @@ endif() # Detect and enable HIP if(ENABLE_HIP) - if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm") + if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") + list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") + endif() + if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" OR CMAKE_PREFIX_PATH MATCHES "rocm") set(CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_HIP_STANDARD_REQUIRED TRUE) if(HIP_AMDGPUTARGET) @@ -206,7 +209,7 @@ if(ENABLE_HIP) set(GPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) endif() - set(TMP_ROCM_DIR_LIST $ENV{CMAKE_PREFIX_PATH}) + set(TMP_ROCM_DIR_LIST "${CMAKE_PREFIX_PATH}:$ENV{CMAKE_PREFIX_PATH}") string(REPLACE ":" ";" TMP_ROCM_DIR_LIST "${TMP_ROCM_DIR_LIST}") list(FILTER TMP_ROCM_DIR_LIST INCLUDE REGEX rocm) list(POP_FRONT TMP_ROCM_DIR_LIST TMP_ROCM_DIR) From 2626074691611ab3b40693eea263fc27808fa556 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 15:00:35 +0100 Subject: [PATCH 0063/1764] GPU Standalone: fix prepare script using O2 env modules --- GPU/GPUTracking/Standalone/cmake/prepare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Standalone/cmake/prepare.sh b/GPU/GPUTracking/Standalone/cmake/prepare.sh index 826cdb5efb56c..17474b5fc6956 100755 --- a/GPU/GPUTracking/Standalone/cmake/prepare.sh +++ b/GPU/GPUTracking/Standalone/cmake/prepare.sh @@ -10,7 +10,7 @@ else WORK_DIR="$ALIBUILD_WORK_DIR" fi eval "`alienv shell-helper`" -alienv load O2/latest -for i in Vc boost fmt CMake ms_gsl Clang ninja; do +# alienv load O2/latest +for i in Vc boost fmt CMake ms_gsl Clang ninja TBB ROOT; do source sw/$ALIARCH/$i/latest/etc/profile.d/init.sh done From b5ab60d021e934b92f335b6267f0891f098e4a65 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Fri, 14 Mar 2025 14:33:16 +0100 Subject: [PATCH 0064/1764] GPU clusterizer with neural networks (#13981) * Copying kernels to implement NN clusterizer * First version of clusterizer in GPU code * Adding a compiling and running version with single-threaded ONNX model executions. Clusters are not getting published yet (FIXME) * Clusters now working by a hack * Working implementation of settings via GPUSettings.h and --configKeyValues "GPU_proc.[setting]=...;..." * Modifying the onnx_interface to include the right headers * Adjusting initialization for new ONNXRuntime version * Adjusting global settings and CF code for several settings * Adding return statement if cluster is rejected * Adding some statements back * Update to latest status of gpu clusterization * Fixing uchar -> uint8_t * Adding utils header * Updating kernels.cmake to uint8_t * Please consider the following formatting changes * Adding an ONNX CPU library in the O2 framework * Please consider the following formatting changes * Fixing macOS build issues with calling O*.data() * Fixing compiler issues and char -> uint8_t * Fixing curly braces * Fixing std::make_shared * Changing order for * Bug-fixing file name * Making NN clusterizer more efficient * Changing constexpr * Fixing build issues * Major changes to make clusterizer parallelizable. Problem remains: different sizes of nnClusterizerBatchedMode lead to different number of clusters if nnClusterizerBatchedMode < clusterer.mPmemory->counters.nClusters * Adjusting for default CF regression * Bug-fix for application of CF regression and logging message * Adding is_boundary check earlier to avoid out-of-bounds access * Bug-fixes for boundary reading * Updating to use explicit calls to kernels instead of if-statements * Bug-fix for class label application * Explicit casting solves regression issues. To be done: Correct publishing for class2 regression * Bug-fixes * Adding some documentation * Please consider the following formatting changes * Modifying for Davids comments * Modifications from comments on PR * Please consider the following formatting changes * iSlice -> iSector * mISlice -> mISector * Minor bug-fixes * Adjusting for comments * Bug-fix for fullCI build * Adding GPUd() for on-device functions * Fixing compile issues, only thing mssing: conversion of float to float16 * Let's see if this does the trick * Making functions (constructors) GPUd() (GPUdDefault()) * GPU kernels should now be findable * Adding ifdefs for standalone build and header exclusions in GPUORTFloat16 * Modifying the approach to not use std:: types. Still needs to be tested and need to do proper memory allocation * New version of clusterizer. Compiles locally, but segfaults in fillInput kernel. Testing with the CI now. * Please consider the following formatting changes * Adjust for comments * Please consider the following formatting changes * Merging dev and adjusting build issues * Adjusting for comments * Fixing incorrect #endif * Please consider the following formatting changes * Fix indentation, remove duplicate define * Fixing one memory issue. Segfault / memory leak persists * Adjusting for new toNative function * Fixing .finalize * Adjusting CMakeLIsts and other bugs * Adding GPUCA_HAS_ONNX only to tracking * Changing to fixed size for number of clusters * Fixed segfault. Not producing the right number of clusters yet. * Network now accepts clusters over all sectors * Whitespaces... * Some weird formatting * Please consider the following formatting changes * Removing white-spaces * Adding necessary if-statement to avoid automatic model loading * Removing GPUConstantMem, adding interOpNumThreads option * Found the bug where I loose clusters * Editor configured for whitespaces at EOF --------- Co-authored-by: ALICE Action Bot Co-authored-by: David Rohr --- Common/ML/include/ML/OrtInterface.h | 9 +- Common/ML/src/OrtInterface.cxx | 164 +++----- GPU/GPUTracking/Base/GPUConstantMem.h | 10 +- GPU/GPUTracking/Base/GPUMemoryResource.h | 1 + GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 + GPU/GPUTracking/CMakeLists.txt | 12 +- .../Definitions/GPUDefGPUParameters.h | 13 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 30 ++ .../GPUTrackingLinkDef_O2_DataTypes.h | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 6 + .../Global/GPUChainTrackingClusterizer.cxx | 112 ++++- GPU/GPUTracking/TPCClusterFinder/ChargePos.h | 1 + .../TPCClusterFinder/ClusterAccumulator.h | 11 + .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 55 +++ .../TPCClusterFinder/GPUTPCNNClusterizer.h | 77 ++++ .../GPUTPCNNClusterizerHost.cxx | 65 +++ .../GPUTPCNNClusterizerHost.h | 68 +++ .../GPUTPCNNClusterizerKernels.cxx | 386 ++++++++++++++++++ .../GPUTPCNNClusterizerKernels.h | 77 ++++ GPU/GPUTracking/kernels.cmake | 13 +- 20 files changed, 1002 insertions(+), 112 deletions(-) create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index 89631d59a3846..93549178848ca 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -41,6 +41,7 @@ class OrtModel OrtModel(std::unordered_map optionsMap) { reset(optionsMap); } void init(std::unordered_map optionsMap) { reset(optionsMap); } void reset(std::unordered_map); + bool isInitialized() { return mInitialized; } virtual ~OrtModel() = default; @@ -55,6 +56,9 @@ class OrtModel template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h std::vector inference(std::vector>&); + template // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h + void inference(I*, size_t, O*); + // template // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type // std::vector inference(std::vector&); @@ -79,8 +83,9 @@ class OrtModel std::vector> mInputShapes, mOutputShapes; // Environment settings - std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda - int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; + bool mInitialized = false; + std::string modelPath, device = "cpu", dtype = "float", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda + int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; std::string printShape(const std::vector&); }; diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index eb124ff6f12c9..fc784dd14d2dc 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -44,17 +44,20 @@ void OrtModel::reset(std::unordered_map optionsMap) if (!optionsMap.contains("model-path")) { LOG(fatal) << "(ORT) Model path cannot be empty!"; } - modelPath = optionsMap["model-path"]; - device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU"); - dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float"); - deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0); - allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); - intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); - loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 2); - enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); - enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); - - std::string dev_mem_str = "Hip"; + + if (!optionsMap["model-path"].empty()) { + modelPath = optionsMap["model-path"]; + device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU"); + dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float"); + deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0); + allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); + intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); + interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); + loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); + enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); + enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); + + std::string dev_mem_str = "Hip"; #if defined(ORT_ROCM_BUILD) #if ORT_ROCM_BUILD == 1 if (device == "ROCM") { @@ -88,12 +91,15 @@ void OrtModel::reset(std::unordered_map optionsMap) if (device == "CPU") { (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); - if (intraOpNumThreads > 1) { + (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); + if (intraOpNumThreads > 1 || interOpNumThreads > 1) { (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); } else if (intraOpNumThreads == 1) { (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); } - LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads"; + if (loggingLevel < 2) { + LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; + } } (pImplOrt->sessionOptions).DisableMemPattern(); @@ -109,6 +115,9 @@ void OrtModel::reset(std::unordered_map optionsMap) } else { (pImplOrt->sessionOptions).DisableProfiling(); } + + mInitialized = true; + (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); @@ -154,16 +163,9 @@ void OrtModel::reset(std::unordered_map optionsMap) outputNamesChar.resize(mOutputNames.size(), nullptr); std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), [&](const std::string& str) { return str.c_str(); }); - - // Print names - LOG(info) << "\tInput Nodes:"; - for (size_t i = 0; i < mInputNames.size(); i++) { - LOG(info) << "\t\t" << mInputNames[i] << " : " << printShape(mInputShapes[i]); } - - LOG(info) << "\tOutput Nodes:"; - for (size_t i = 0; i < mOutputNames.size(); i++) { - LOG(info) << "\t\t" << mOutputNames[i] << " : " << printShape(mOutputShapes[i]); + if (loggingLevel < 2) { + LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")"; } } @@ -187,36 +189,6 @@ std::vector OrtModel::v2v(std::vector& input, bool clearInput) } } -template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h -std::vector OrtModel::inference(std::vector& input) -{ - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} - -template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h -std::vector OrtModel::inference(std::vector>& input) -{ - std::vector inputTensor; - for (auto i : input) { - std::vector inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); - } - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} - std::string OrtModel::printShape(const std::vector& v) { std::stringstream ss(""); @@ -227,74 +199,68 @@ std::string OrtModel::printShape(const std::vector& v) return ss.str(); } -template <> -std::vector OrtModel::inference(std::vector& input) +template +std::vector OrtModel::inference(std::vector& input) { std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); + if constexpr (std::is_same_v) { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); + } else { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); + } // input.clear(); auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - float* outputValues = outputTensors[0].template GetTensorMutableData(); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; + O* outputValues = outputTensors[0].template GetTensorMutableData(); + std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; outputTensors.clear(); return outputValuesVec; } -template <> -std::vector OrtModel::inference(std::vector& input) -{ - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - float* outputValues = outputTensors[0].template GetTensorMutableData(); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} +template std::vector OrtModel::inference(std::vector&); -template <> -std::vector OrtModel::inference(std::vector& input) -{ - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - OrtDataType::Float16_t* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} +template std::vector OrtModel::inference(std::vector&); -template <> -std::vector OrtModel::inference(std::vector& input) +template std::vector OrtModel::inference(std::vector&); + +template +void OrtModel::inference(I* input, size_t input_size, O* output) { - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - OrtDataType::Float16_t* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; + std::vector inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + Ort::Value inputTensor = Ort::Value(nullptr); + if constexpr (std::is_same_v) { + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size, inputShape.data(), inputShape.size()); + } else { + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size()); + } + + std::vector outputShape{inputShape[0], mOutputShapes[0][1]}; + size_t outputSize = (int64_t)(input_size * mOutputShapes[0][1] / mInputShapes[0][1]); + Ort::Value outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size()); + + (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size()); // TODO: Not sure if 1 is correct here } -template <> -std::vector OrtModel::inference(std::vector>& input) +template void OrtModel::inference(OrtDataType::Float16_t*, size_t, float*); + +template void OrtModel::inference(float*, size_t, float*); + +template +std::vector OrtModel::inference(std::vector>& input) { std::vector inputTensor; for (auto i : input) { std::vector inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); + if constexpr (std::is_same_v) { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); + } else { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, i.data(), i.size(), inputShape.data(), inputShape.size())); + } } // input.clear(); auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - OrtDataType::Float16_t* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; + O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); + std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; outputTensors.clear(); return outputValuesVec; } diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 4f83fa48a64e0..8f1cc90f5ae93 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -34,12 +34,15 @@ #include "GPUKernelDebugOutput.h" #endif +#ifdef GPUCA_HAS_ONNX +#include "GPUTPCNNClusterizer.h" +#endif + namespace o2::gpu { struct GPUConstantMem { GPUParam param; - GPUTPCTracker - tpcTrackers[GPUCA_NSECTORS]; + GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]; GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; @@ -55,6 +58,9 @@ struct GPUConstantMem { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT GPUKernelDebugOutput debugOutput; #endif +#ifdef GPUCA_HAS_ONNX + GPUTPCNNClusterizer tpcNNClusterer[GPUCA_NSECTORS]; +#endif template GPUd() auto& getTRDTracker(); diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 3bb2c363db2a9..06e350db0bfc7 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -28,6 +28,7 @@ struct GPUMemoryReuse { }; enum Group : uint16_t { ClustererScratch, + NNClusterer, ClustererZS, TrackerScratch, TrackerDataLinks, diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 656fa37fb6a4c..df9a7380834ce 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -93,6 +93,9 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place processors()->tpcClusterer[i].mISector = i; +#ifdef GPUCA_HAS_ONNX + processors()->tpcNNClusterer[i].mISector = i; +#endif } #ifndef GPUCA_NO_ROOT mROOTDump = GPUROOTDumpCore::getAndCreate(); diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index c97742ac1d47f..d5a90dbd65ea3 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -159,8 +159,8 @@ set(HDRS_INSTALL ) set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx) -set(SRCS_NO_CINT - ${SRCS_NO_CINT} + +set(SRCS_NO_CINT ${SRCS_NO_CINT} Global/GPUChainITS.cxx ITS/GPUITSFitter.cxx ITS/GPUITSFitterKernels.cxx @@ -191,6 +191,10 @@ set(SRCS_NO_CINT Refit/GPUTrackingRefitKernel.cxx Merger/GPUTPCGMO2Output.cxx) +if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") + list(APPEND SRCS_NO_CINT TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx TPCClusterFinder/GPUTPCNNClusterizer.cxx TPCClusterFinder/GPUTPCNNClusterizerHost.cxx) +endif() + set(SRCS_DATATYPES ${SRCS_DATATYPES} DataTypes/TPCPadGainCalib.cxx @@ -273,6 +277,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PRIVATE_LINK_LIBRARIES O2::DataFormatsTPC SOURCES ${SRCS_DATATYPES}) target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) + o2_target_root_dictionary(GPUDataTypes HEADERS ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} LINKDEF GPUTrackingLinkDef_O2_DataTypes.h) @@ -292,6 +297,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::TPCFastTransformation O2::DetectorsRaw O2::Steer + O2::ML PUBLIC_INCLUDE_DIRECTORIES . Definitions DataTypes @@ -317,7 +323,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${targetName} PRIVATE $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1) o2_target_root_dictionary(${MODULE} HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 3ed6c25762405..55f2e76344bd5 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -477,6 +477,9 @@ #ifndef GPUCA_LB_GPUTPCCFClusterizer #define GPUCA_LB_GPUTPCCFClusterizer 512 #endif + #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels 512 + #endif #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256 #endif @@ -495,6 +498,16 @@ #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression + +#ifdef GPUCA_HAS_ONNX +#define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels +#endif + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9b6be7743e485..63fcf51004eae 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -222,6 +222,35 @@ AddOption(tpcTriggerHandling, bool, true, "", 0, "Enable TPC trigger handling") AddHelp("help", 'h') EndConfig() +BeginSubConfig(GPUSettingsProcessingNNclusterizer, nn, configStandalone.proc, "NN", 0, "Processing settings for neural network clusterizer", proc_nn) +AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural network clusterizer should be used.") +AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)") +AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id") +AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference") +AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16 +AddOption(nnInferenceIntraOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetIntraOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") +AddOption(nnInferenceInterOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetInterOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") +AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347") +AddOption(nnInferenceOrtProfiling, int, 0, "", 0, "Enables profiling of model execution in ONNX Runtime") +AddOption(nnInferenceOrtProfilingPath, std::string, ".", "", 0, "If nnInferenceOrtProfiling is set, the path to store the profiling data") +AddOption(nnInferenceVerbosity, int, 1, "", 0, "0: No messages; 1: Warnings; 2: Warnings + major debugs; >3: All debugs") +AddOption(nnClusterizerAddIndexData, int, 1, "", 0, "If normalized index data (sector, row, pad), should be appended to the input") +AddOption(nnClusterizerSizeInputRow, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2") +AddOption(nnClusterizerSizeInputPad, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2") +AddOption(nnClusterizerSizeInputTime, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2") +AddOption(nnClusterizerUseCfRegression, int, 0, "", 0, "(bool, default = false) If true, use the regression from the native clusterizer and not the NN") +AddOption(nnClusterizerApplyCfDeconvolution, int, 0, "", 0, "Applies the CFDeconvolution kernel before the digits to the network are filled") +AddOption(nnClusterizerBatchedMode, unsigned int, 1, "", 0, "(int, default = 1) If >1, the NN is evaluated on batched input of size specified in this variable") +AddOption(nnClusterizerVerbosity, int, -1, "", 0, "(int, default = -1) If >0, logging messages of the clusterizer will be displayed") +AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the boundary of the input to the NN") +AddOption(nnClusterizerApplyNoiseSuppression, int, 1, "", 0, "Applies the NoiseSuppression kernel before the digits to the network are filled") +AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path") +AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.") +AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path") +AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).") +AddHelp("help", 'h') +EndConfig() + BeginSubConfig(GPUSettingsProcessing, proc, configStandalone, "PROC", 0, "Processing settings", proc) AddOption(deviceNum, int32_t, -1, "gpuDevice", 0, "Set GPU device to use (-1: automatic, -2: for round-robin usage in timeslice-pipeline)") AddOption(gpuDeviceOnly, bool, false, "", 0, "Use only GPU as device (i.e. no CPU for OpenCL)") @@ -299,6 +328,7 @@ AddOption(printSettings, bool, false, "", 0, "Print all settings when initializi AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param) +AddSubConfig(GPUSettingsProcessingNNclusterizer, nn) AddHelp("help", 'h') EndConfig() #endif // __OPENCL__ diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h b/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h index ab60827655a43..35ebbabe41672 100644 --- a/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h +++ b/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h @@ -30,6 +30,7 @@ #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessing + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessingParam + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessingRTC + ; +#pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessingNNclusterizer + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsDisplay + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsDisplayLight + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsDisplayHeavy + ; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 6dcb6f1d7e514..37ad164d20a60 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -103,6 +103,9 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() if (GetRecoSteps() & RecoStep::TPCClusterFinding) { for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUProcessor(&processors()->tpcClusterer[i], GetRecoStepsGPU() & RecoStep::TPCClusterFinding); +#ifdef GPUCA_HAS_ONNX + mRec->RegisterGPUProcessor(&processors()->tpcNNClusterer[i], GetRecoStepsGPU() & RecoStep::TPCClusterFinding); +#endif } } if (GetRecoSteps() & RecoStep::Refit) { @@ -148,6 +151,9 @@ void GPUChainTracking::RegisterGPUProcessors() if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcClusterer[i], &processors()->tpcClusterer[i]); +#ifdef GPUCA_HAS_ONNX + mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcNNClusterer[i], &processors()->tpcNNClusterer[i]); +#endif } } if (GetRecoStepsGPU() & RecoStep::Refit) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 62a4a524d67df..63d56da37595b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -39,6 +39,11 @@ #include #endif +#ifdef GPUCA_HAS_ONNX +#include "GPUTPCNNClusterizerKernels.h" +#include "GPUTPCNNClusterizerHost.h" +#endif + using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; @@ -149,7 +154,8 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) + : 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC @@ -606,6 +612,41 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) } +#ifdef GPUCA_HAS_ONNX + if (GetProcessingSettings().nn.applyNNclusterizer) { + uint32_t maxClusters = -1; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters); + } + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; + const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; + clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression; + clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow; + clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad; + clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime; + clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData; + clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0); + clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode; + clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue; + clustererNN.nnClusterizerTotalClusters = maxClusters; + clustererNN.nnClassThreshold = nn_settings.nnClassThreshold; + clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold; + if (clustererNN.nnSigmoidTrafoClassThreshold) { + clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold)); + } + if (nn_settings.nnClusterizerVerbosity < 0) { + clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity; + } else { + clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity; + } + clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos; + GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); + AllocateRegisteredMemory(clustererNN.mMemoryId); + } + } +#endif + if (doGPU && mIOPtrs.tpcZS) { processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta; WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); @@ -854,6 +895,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) uint32_t iSector = iSectorBase + lane; GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + if (doGPU) { SynchronizeStream(lane); } @@ -871,17 +913,77 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) return; } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + if (GetProcessingSettings().nn.applyNNclusterizer) { +#ifdef GPUCA_HAS_ONNX + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; + const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; + GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); + + if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + } + + float time_clusterizer = 0, time_fill = 0; + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNN.nnClusterizerBatchedMode); batch++) { + uint batchStart = batch * clustererNN.nnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + + auto start0 = std::chrono::high_resolution_clock::now(); + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data + + auto stop0 = std::chrono::high_resolution_clock::now(); + auto start1 = std::chrono::high_resolution_clock::now(); + nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnClusterizerDtype); + if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + } else { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + } + + if (!clustererNN.nnClusterizerUseCfRegression) { + nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype); + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1 + if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) { + nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype); + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2 + } + } + auto stop1 = std::chrono::high_resolution_clock::now(); + + time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; + } + auto start1 = std::chrono::high_resolution_clock::now(); + if (clustererNN.nnClusterizerUseCfRegression) { + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + } + auto stop1 = std::chrono::high_resolution_clock::now(); + time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + if (clustererNN.nnClusterizerVerbosity < 3) { + int acceptedClusters = 0; + for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { + acceptedClusters += clustererNN.outputDataClass[i]; + } + LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; + } +#else + GPUFatal("Project not compiled with neural network clusterization. Aborting."); +#endif + } else { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); + } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); if (doGPU && propagateMCLabels) { TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); if (doGPU) { SynchronizeStream(lane); } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels } + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSector, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); } diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h index b4a4752b0f932..cdd489e0ef938 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h @@ -45,6 +45,7 @@ struct ChargePos { GPUdi() tpccf::Row row() const { return gpad / TPC_PADS_PER_ROW_PADDED; } GPUdi() tpccf::Pad pad() const { return gpad % TPC_PADS_PER_ROW_PADDED - GPUCF_PADDING_PAD; } GPUdi() tpccf::TPCFragmentTime time() const { return timePadded - GPUCF_PADDING_TIME; } + GPUdi() tpccf::TPCFragmentTime globalTime() const { return timePadded; } private: // Maps the position of a pad given as row and index in that row to a unique diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index f0c6ac47f3c8a..90d977372b201 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -41,6 +41,17 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); + GPUd() void setFull(float qtot, float padMean, float padSigma, float timeMean, float timeSigma, uint8_t splitInPad, uint8_t splitInTime) + { + mQtot = qtot; + mPadMean = padMean; + mPadSigma = padSigma; + mTimeMean = timeMean; + mTimeSigma = timeSigma; + mSplitInPad = splitInPad; + mSplitInTime = splitInTime; + } + GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime); GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D&); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx new file mode 100644 index 0000000000000..6a9b6f546ae07 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -0,0 +1,55 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizer.cxx +/// \author Christian Sonnabend + +#include "GPUReconstruction.h" +#include "ML/3rdparty/GPUORTFloat16.h" +#include "GPUTPCNNClusterizer.h" + +using namespace o2::gpu; + +void GPUTPCNNClusterizer::InitializeProcessor() {} + +void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {} + +void* GPUTPCNNClusterizer::setIOPointers(void* mem) +{ + if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize); + } else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize); + } + computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode); + computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); + computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode); + computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); + if (nnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } + if (!nnClusterizerUseCfRegression) { + if (nnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + } + if (nnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + } + } + return mem; +} + +void GPUTPCNNClusterizer::RegisterMemoryAllocation() +{ + AllocateAndInitializeLate(); + int32_t memType = GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK; + mMemoryId = mRec->RegisterMemoryAllocation(this, &GPUTPCNNClusterizer::setIOPointers, memType, "TPCNNClusterer", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::NNClusterer, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)}); +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h new file mode 100644 index 0000000000000..ea6340dfd48bc --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -0,0 +1,77 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizer.h +/// \author Christian Sonnabend + +#ifndef O2_GPUTPCNNCLUSTERIZER_H +#define O2_GPUTPCNNCLUSTERIZER_H + +#include "ChargePos.h" +#include "GPUProcessor.h" + +namespace o2::OrtDataType +{ +struct Float16_t; +} + +namespace o2::gpu +{ + +class GPUTPCNNClusterizer : public GPUProcessor +{ + public: + GPUTPCNNClusterizer() = default; + void* setIOPointers(void*); + void RegisterMemoryAllocation(); + void InitializeProcessor(); + void SetMaxData(const GPUTrackingInOutPointers&); + + // Neural network clusterization + + int nnClusterizerSizeInputRow = 3; + int nnClusterizerSizeInputPad = 3; + int nnClusterizerSizeInputTime = 3; + int nnClusterizerElementSize = -1; + bool nnClusterizerAddIndexData = true; + float nnClassThreshold = 0.16; + bool nnSigmoidTrafoClassThreshold = 1; + int nnClusterizerUseCfRegression = 0; + int nnClusterizerBatchedMode = 1; + int nnClusterizerTotalClusters = 1; + int nnClusterizerVerbosity = 0; + int nnClusterizerBoundaryFillValue = -1; + int nnClusterizerDumpDigits = 0; + int nnClusterizerApplyCfDeconvolution = 0; + int nnClusterizerModelClassNumOutputNodes = -1; + int nnClusterizerModelReg1NumOutputNodes = -1; + int nnClusterizerModelReg2NumOutputNodes = -1; + int nnClusterizerDtype = 0; // 0: float16, 1: float32 + int mISector = -1; + + // Memory allocation for neural network + uint class2_elements = 0; + float* inputData32 = nullptr; + OrtDataType::Float16_t* inputData16 = nullptr; + float* outputDataClass = nullptr; + float* modelProbabilities = nullptr; + float* outputDataReg1 = nullptr; + float* outputDataReg2 = nullptr; + + ChargePos* peakPositions = nullptr; + bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx + float* centralCharges = nullptr; + int16_t mMemoryId = -1; +}; // class GPUTPCNNClusterizer + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx new file mode 100644 index 0000000000000..5002c63524020 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -0,0 +1,65 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerHost.cxx +/// \author Christian Sonnabend + +#include "GPUTPCNNClusterizerHost.h" +#include "GPUTPCNNClusterizer.h" +#include "GPUSettings.h" +#include "ML/3rdparty/GPUORTFloat16.h" + +using namespace o2::gpu; + +GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer) +{ + OrtOptions = { + {"model-path", settings.nnClassificationPath}, + {"device", settings.nnInferenceDevice}, + {"device-id", std::to_string(settings.nnInferenceDeviceId)}, + {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)}, + {"dtype", settings.nnInferenceDtype}, + {"intra-op-num-threads", std::to_string(settings.nnInferenceIntraOpNumThreads)}, + {"inter-op-num-threads", std::to_string(settings.nnInferenceInterOpNumThreads)}, + {"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)}, + {"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)}, + {"profiling-output-path", settings.nnInferenceOrtProfilingPath}, + {"logging-level", std::to_string(settings.nnInferenceVerbosity)}}; + + model_class.init(OrtOptions); + clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + + reg_model_paths = splitString(settings.nnRegressionPath, ":"); + + if (!settings.nnClusterizerUseCfRegression) { + if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) { + OrtOptions["model-path"] = reg_model_paths[0]; + model_reg_1.init(OrtOptions); + clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + } else { + OrtOptions["model-path"] = reg_model_paths[0]; + model_reg_1.init(OrtOptions); + clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + OrtOptions["model-path"] = reg_model_paths[1]; + model_reg_2.init(OrtOptions); + clusterer.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + } + } +} + +void GPUTPCNNClusterizerHost::networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype) +{ + if (dtype == 0) { + model.inference(clusterer.inputData16, size * clusterer.nnClusterizerElementSize, output); + } else { + model.inference(clusterer.inputData32, size * clusterer.nnClusterizerElementSize, output); + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h new file mode 100644 index 0000000000000..7efa0edecb893 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -0,0 +1,68 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerHost.h +/// \author Christian Sonnabend + +#ifndef O2_GPUTPCNNCLUSTERIZERHOST_H +#define O2_GPUTPCNNCLUSTERIZERHOST_H + +#include +#include +#include +#include "ML/OrtInterface.h" + +using namespace o2::ml; + +namespace o2::OrtDataType +{ +struct Float16_t; +} + +namespace o2::gpu +{ + +class GPUTPCNNClusterizer; +struct GPUSettingsProcessingNNclusterizer; + +class GPUTPCNNClusterizerHost +{ + public: + GPUTPCNNClusterizerHost() = default; + GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); + + void networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype); + + std::unordered_map OrtOptions; + o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters + std::vector reg_model_paths; + + private: + // Avoid including CommonUtils/StringUtils.h + std::vector splitString(const std::string& input, const std::string& delimiter) + { + std::vector tokens; + std::size_t pos = 0; + std::size_t found; + + while ((found = input.find(delimiter, pos)) != std::string::npos) { + tokens.push_back(input.substr(pos, found - pos)); + pos = found + delimiter.length(); + } + tokens.push_back(input.substr(pos)); + + return tokens; + } +}; // class GPUTPCNNClusterizerHost + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx new file mode 100644 index 0000000000000..25cd2497fbf62 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -0,0 +1,386 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerKernels.cxx +/// \author Christian Sonnabend + +#include "GPUTPCNNClusterizerKernels.h" +#include "GPUTPCCFClusterizer.h" + +using namespace o2::gpu; +using namespace o2::gpu::tpccf; + +#include "CfConsts.h" +#include "CfUtils.h" +#include "ClusterAccumulator.h" +#include "ML/3rdparty/GPUORTFloat16.h" + +#if !defined(GPUCA_GPUCODE) +#include "GPUHostDataTypes.h" +#include "MCLabelAccumulator.h" +#endif + +#ifdef GPUCA_GPUCODE +#include "GPUTPCCFClusterizer.inc" +#endif + +// Defining individual thread functions for data filling, determining the class label and running the CF clusterizer +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + if (clustererNN.outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices + return; + } + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); + tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + o2::gpu::GPUTPCCFClusterizer::GPUSharedMemory smem_new; + GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem_new, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + GPUTPCNNClusterizerKernels::fillInputData(nBlocks, nThreads, iBlock, iThread, processors, sector, dtype, batchStart); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + auto& clusterer = processors.tpcNNClusterer[sector]; + uint glo_idx = get_global_id(0); + uint elem_iterator = glo_idx * clusterer.nnClusterizerModelClassNumOutputNodes; + float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] + uint class_label = 0; + for (int pIdx = elem_iterator; pIdx < elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes; pIdx++) { + if (pIdx == elem_iterator) { + current_max_prob = clusterer.modelProbabilities[pIdx]; + } else { + class_label = (clusterer.modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label); + } + } + // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" + clusterer.outputDataClass[glo_idx + batchStart] = class_label; +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { + return; + } + GPUTPCNNClusterizerKernels::publishClustersReg1(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { + return; + } + GPUTPCNNClusterizerKernels::publishClustersReg2(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); +} + +// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary +GPUd() int GPUTPCNNClusterizerKernels::padOffset(int row_ref, int row_current, const GPUTPCGeometry& geo) +{ + return (int)((geo.NPads(row_current) - geo.NPads(row_ref)) / 2); +} + +GPUd() int GPUTPCNNClusterizerKernels::rowOffset(int row, int global_shift) +{ + return (row > 62 ? global_shift : 0); +} + +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift, const GPUTPCGeometry& geo) +{ + if (pad < 0 || row < 0) { // Faster short-circuit + return true; + } else if (row < 63) { + return (pad >= static_cast(geo.NPads(row))); + } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network + return true; + } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { + return (pad >= static_cast(geo.NPads(row - global_shift))); + } else { + return true; + } +} + +// Filling the input data for the neural network where there is no boundary +GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& processors, uint8_t sector, int8_t dtype, uint batchStart) +{ + uint glo_idx = get_global_id(0); + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + Array2D isPeakMap(clusterer.mPpeakMap); + + uint write_idx = glo_idx * clustererNN.nnClusterizerElementSize; // Potential optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + + ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors + float central_charge = static_cast(chargeMap[peak].unpack()); + + clustererNN.peakPositions[glo_idx] = peak; + clustererNN.centralCharges[glo_idx] = central_charge; + clustererNN.outputDataClass[glo_idx + batchStart] = -1; + + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); +#ifndef GPUCA_GPUCODE + GPUCA_UNROLL(U(), U()); +#endif + for (int r = -clustererNN.nnClusterizerSizeInputRow; r <= clustererNN.nnClusterizerSizeInputRow; r++) { + bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); + int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r, clusterer.Param().tpcGeometry); + for (int p = -clustererNN.nnClusterizerSizeInputPad + pad_offset; p <= clustererNN.nnClusterizerSizeInputPad + pad_offset; p++) { + bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow, clusterer.Param().tpcGeometry); + for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) { + if (!is_boundary) { + ChargePos tmp_pos(row + r, pad + p, time + t); + if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization + clustererNN.clusterFlags[2 * glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + } + if (dtype == 0) { + clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + } else { + clustererNN.inputData32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + } + } else { + // Filling boundary just to make sure that no values are left unintentionally + if (dtype == 0) { + clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + } else { + clustererNN.inputData32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + } + } + write_idx++; + } + } + } + if (clustererNN.nnClusterizerAddIndexData) { + if (dtype == 0) { + clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISector / 36.f); + clustererNN.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row)); + } else { + clustererNN.inputData32[write_idx] = clusterer.mISector / 36.f; + clustererNN.inputData32[write_idx + 1] = row / 152.f; + clustererNN.inputData32[write_idx + 2] = static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row); + } + } +} + +GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); + MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); + tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + uint full_glo_idx = glo_idx + batchStart; + int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg1NumOutputNodes; + + // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); + + if (clustererNN.outputDataClass[full_glo_idx] == 1) { + + ClusterAccumulator pc; + + // Publishing logic is taken from default clusterizer + if (onlyMC) { + ClusterAccumulator dummy_pc; + CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + GPUTPCCFClusterizer::buildCluster( + clusterer.Param().rec, + chargeMap, + clustererNN.peakPositions[glo_idx], + smem.posBcast, + smem.buf, + smem.innerAboveThreshold, + &dummy_pc, + labelAcc); + } + + if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg1[model_output_index + 4], + static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg1[model_output_index], + clustererNN.outputDataReg1[model_output_index + 2], + (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg1[model_output_index + 1], + clustererNN.outputDataReg1[model_output_index + 3], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + + tpc::ClusterNative myCluster; + bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (rejectCluster) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + uint rowIndex = 0; + if (clusterer.mPclusterByRow != nullptr) { + rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( + clusterer, + myCluster, + clustererNN.peakPositions[glo_idx].row(), + clusterer.mNMaxClusterPerRow, + clusterer.mPclusterInRow, + clusterOut); + if (clusterer.mPclusterPosInRow != nullptr) { + clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex; + } + } else if (clusterer.mPclusterPosInRow) { + rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; + } + CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + } else { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } +} + +GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); + MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); + tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + uint full_glo_idx = glo_idx + batchStart; + int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg2NumOutputNodes; + + // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg2NumOutputNodes << " -- " << clustererNN.peakPositions.size() << " -- " << clustererNN.centralCharges.size(); + + if (clustererNN.outputDataClass[full_glo_idx] > 0) { + + ClusterAccumulator pc; + + if (onlyMC) { + ClusterAccumulator dummy_pc; + CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + GPUTPCCFClusterizer::buildCluster( + clusterer.Param().rec, + chargeMap, + clustererNN.peakPositions[glo_idx], + smem.posBcast, + smem.buf, + smem.innerAboveThreshold, + &dummy_pc, + labelAcc); + } + + if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + // Cluster 1 + pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 8], + static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index], + clustererNN.outputDataReg2[model_output_index + 4], + (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 2], + clustererNN.outputDataReg2[model_output_index + 6], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + + tpc::ClusterNative myCluster; + bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (rejectCluster) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + uint rowIndex = 0; + if (clusterer.mPclusterByRow != nullptr) { + rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( + clusterer, + myCluster, + clustererNN.peakPositions[glo_idx].row(), + clusterer.mNMaxClusterPerRow, + clusterer.mPclusterInRow, + clusterOut); + if (clusterer.mPclusterPosInRow != nullptr) { + clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex; + } + } else if (clusterer.mPclusterPosInRow) { + rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; + } + CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + + // Cluster 2 + pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 9], + static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index + 1], + clustererNN.outputDataReg2[model_output_index + 5], + (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 3], + clustererNN.outputDataReg2[model_output_index + 7], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + + rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (rejectCluster) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + if (clusterer.mPclusterByRow != nullptr) { + rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( + clusterer, + myCluster, + clustererNN.peakPositions[glo_idx].row(), + clusterer.mNMaxClusterPerRow, + clusterer.mPclusterInRow, + clusterOut); + if (clusterer.mPclusterPosInRow != nullptr) { + clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex; + } + } else if (clusterer.mPclusterPosInRow) { + rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; + } + // CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters? + } else { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h new file mode 100644 index 0000000000000..c7bd18115d61f --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -0,0 +1,77 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerKernels.h +/// \author Christian Sonnabend + +#ifndef O2_GPU_NN_CLUSTERIZER_H +#define O2_GPU_NN_CLUSTERIZER_H + +#include "clusterFinderDefs.h" +#include "GPUGeneralKernels.h" +#include "GPUConstantMem.h" +#include "GPUTPCClusterFinder.h" +#include "Array2D.h" +#include "PackedCharge.h" +#include "GPUTPCNNClusterizer.h" + +namespace o2::tpc +{ +struct ClusterNative; +} // namespace o2::tpc + +namespace o2::gpu +{ + +class ClusterAccumulator; +class MCLabelAccumulator; + +class GPUTPCNNClusterizerKernels : public GPUKernelTemplate +{ + public: + static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels); + struct GPUSharedMemory { + // Regular cluster finder + ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_BUILD_N]; + uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; + }; + + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() + { + return GPUDataTypes::RecoStep::TPCClusterFinding; + } + + enum K : int32_t { + runCfClusterizer = 0, + fillInputNN = 1, + determineClass1Labels = 2, + determineClass2Labels = 3, + publishClass1Regression = 4, + publishClass2Regression = 5, + }; + + template + GPUd() static void Thread(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, processorType&, uint8_t = 0, int8_t = 0, int8_t = 0, uint = 0, Args...); + + private: + static GPUd() void fillInputData(int32_t, int32_t, int32_t, int32_t, processorType&, uint8_t, int8_t, uint); + static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + + static GPUd() int padOffset(int, int, const GPUTPCGeometry&); + static GPUd() int rowOffset(int, int); + static GPUd() bool isBoundary(int, int, int, const GPUTPCGeometry&); +}; + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 4b7aab75519fa..ad348a84264f0 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -24,6 +24,9 @@ o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizati o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) +if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") +o2_gpu_kernel_file_list(TPCNNCLUSTERFINDER ERRORS ClusterAccumulator.cxx GPUTPCNNClusterizerKernels.cxx) +endif() o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) @@ -111,7 +114,15 @@ o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUS o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") +if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +endif() +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) From 911f7dce4f1a36bf7031959822253ccb39f4b02a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sat, 15 Mar 2025 19:35:20 +0100 Subject: [PATCH 0065/1764] DPL: account for IO time correctly (#14064) --- .../AnalysisSupport/src/DataInputDirector.cxx | 56 +++++++++++++------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 2c51360cd9923..cfb5ca34b062b 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -387,18 +387,53 @@ int DataInputDescriptor::findDFNumber(int file, std::string dfName) return it - dfList.begin(); } +struct CalculateDelta { + CalculateDelta(uint64_t& target) + : mTarget(target) + { + start = uv_hrtime(); + } + ~CalculateDelta() + { + if (!active) { + return; + } + O2_SIGNPOST_ACTION(reader_memory_dump, [](void*) { + void (*dump_)(const char*); + if (void* sym = dlsym(nullptr, "igprof_dump_now")) { + dump_ = __extension__(void (*)(const char*)) sym; + if (dump_) { + std::string filename = fmt::format("reader-memory-dump-{}.gz", uv_hrtime()); + dump_(filename.c_str()); + } + } + }); + mTarget += (uv_hrtime() - start); + } + + void deactivate() { + active = false; + } + + bool active = true; + uint64_t& mTarget; + uint64_t start; + uint64_t stop; +}; + bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed) { - auto ioStart = uv_hrtime(); - + CalculateDelta t(mIOTime); auto folder = getFileFolder(counter, numTF); if (!folder.filesystem()) { + t.deactivate(); return false; } auto rootFS = std::dynamic_pointer_cast(folder.filesystem()); if (!rootFS) { + t.deactivate(); throw std::runtime_error(fmt::format(R"(Not a TFile filesystem!)")); } // FIXME: Ugly. We should detect the format from the treename, good enough for now. @@ -420,6 +455,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh // FIXME: we should distinguish between an actually missing object and one which has a non compatible // format. if (!format) { + t.deactivate(); LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); auto parentFile = getParentFile(counter, numTF, treename); if (parentFile != nullptr) { @@ -460,19 +496,6 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh f2b->setLabel(treename.c_str()); f2b->fill(datasetSchema, format); - mIOTime += (uv_hrtime() - ioStart); - - O2_SIGNPOST_ACTION(reader_memory_dump, [](void*) { - void (*dump_)(const char*); - if (void* sym = dlsym(nullptr, "igprof_dump_now")) { - dump_ = __extension__(void (*)(const char*)) sym; - if (dump_) { - std::string filename = fmt::format("reader-memory-dump-{}.gz", uv_hrtime()); - dump_(filename.c_str()); - } - } - }); - return true; } @@ -820,7 +843,8 @@ bool DataInputDirector::readTree(DataAllocator& outputs, header::DataHeader dh, treename = aod::datamodel::getTreeName(dh); } - return didesc->readTree(outputs, dh, counter, numTF, treename, totalSizeCompressed, totalSizeUncompressed); + auto result = didesc->readTree(outputs, dh, counter, numTF, treename, totalSizeCompressed, totalSizeUncompressed); + return result; } void DataInputDirector::closeInputFiles() From 360be36ab9c948f133b745fbde7e6a822b47e8df Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 15 Mar 2025 20:03:50 +0100 Subject: [PATCH 0066/1764] GPU TPC CF: Add option to select 1pad or 2pad edge flag definition (#14067) --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + .../TPCClusterFinder/ClusterAccumulator.cxx | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 63fcf51004eae..b7f761c73ffc0 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -128,6 +128,7 @@ AddOptionRTC(cfInnerThreshold, uint8_t, 0, "", 0, "Cluster Finder extends cluste AddOptionRTC(cfMinSplitNum, uint8_t, 1, "", 0, "Minimum number of split charges in a cluster for the cluster to be marked as split") AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression") AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255") +AddOptionRTC(cfEdgeTwoPads, uint8_t, 1, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster") AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger") AddOptionRTC(nWaysOuter, int8_t, 0, "", 0, "Store outer param") AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits") diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index b6792ce3a9ef5..b3b3c64095017 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -79,14 +79,19 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, t { Pad pad = pos.pad(); - bool isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge - if (isEdgeCluster) { - bool leftEdge = (pad < 2); - if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { - isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. - } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { - mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak + bool isEdgeCluster; + if (param.rec.tpc.cfEdgeTwoPads) { + isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + if (isEdgeCluster) { + bool leftEdge = (pad < 2); + if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { + isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. + } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { + mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak + } } + } else { + isEdgeCluster = pad == 0 || pad == param.tpcGeometry.NPads(pos.row()) - 1; } cn.qTot = CAMath::Float2UIntRn(mQtot); From b0776cad0fb5c1598bbe80628ff3b2ae492dc2aa Mon Sep 17 00:00:00 2001 From: pillot Date: Sat, 15 Mar 2025 20:11:06 +0100 Subject: [PATCH 0067/1764] [MCH] optional setting of CCDB file time window (#14059) --- Detectors/MUON/MCH/Conditions/README.md | 2 + .../MCH/Conditions/src/scan-hvlv-ccdb.cxx | 54 +++++++++++-------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/Detectors/MUON/MCH/Conditions/README.md b/Detectors/MUON/MCH/Conditions/README.md index d35fdcd0a0958..21892a7478d86 100644 --- a/Detectors/MUON/MCH/Conditions/README.md +++ b/Detectors/MUON/MCH/Conditions/README.md @@ -73,6 +73,8 @@ Usage: change HV thresholds -d [ --duration ] arg (=0) minimum duration (ms) of HV/LV issues to consider + -i [ --interval ] arg (=30) creation time interval (minutes) between + CCDB files -w [ --warning ] arg (=1) warning level (0, 1 or 2) -p [ --print ] arg (=1) print level (0, 1, 2 or 3) -o [ --output ] arg (=scan.root) output root file name diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx index 32cd365916c63..307759c97a0c3 100644 --- a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -153,6 +153,20 @@ std::string getTime(uint64_t ts) return time; } +//---------------------------------------------------------------------------- +std::string getDuration(uint64_t tStart, uint64_t tStop) +{ + /// get the duration (dd hh:mm:ss) between the two time stamps (ms) + + auto dt = ms2s(tStop - tStart); + auto s = dt % 60; + auto m = (dt / 60) % 60; + auto h = (dt / 3600) % 24; + auto d = dt / 86400; + + return fmt::format("{:02}d {:02}:{:02}:{:02}", d, h, m, s); +} + //---------------------------------------------------------------------------- std::set getRuns(std::string runList) { @@ -283,15 +297,17 @@ void drawRunBoudaries(const RBMAP& runBoundaries, TCanvas* c) } //---------------------------------------------------------------------------- -DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, uint64_t tStart, uint64_t tStop) +DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, + uint64_t tStart, uint64_t tStop, uint64_t timeInterval) { /// get the time boundaries of every HV/LV files found in the time range - // add extra margin (ms) of ± 1 min to the creation time, which occurs every 30 min - static const uint64_t timeMarging[2] = {60000, 1860000}; + // add an extra margin (ms) of ± 1 min to the creation time, + // which corresponds to the end of the time interval covered by the file + static const uint64_t timeMarging = 60000; std::istringstream fileInfo(api.list(what.c_str(), false, "text/plain", - tStop + timeMarging[1], tStart - timeMarging[0])); + tStop + timeInterval + timeMarging, tStart - timeMarging)); DPBMAP dpBoundaries{}; std::string dummy{}; @@ -357,7 +373,7 @@ void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, } //---------------------------------------------------------------------------- -void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV) +void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t timeInterval) { /// print the time boundaries of every HV/LV files found in the full time range @@ -365,7 +381,13 @@ void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV) printf("------------------------------------\n"); for (auto [tStart, tStop] : dpBoundaries) { - printf("%llu - %llu (%s - %s)\n", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + printf("%llu - %llu (%s - %s)", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + if (tStop - tStart < 60000 * (timeInterval - 1) || tStop - tStart > 60000 * (timeInterval + 1)) { + printf("\e[0;31m ! warning: validity range %s != %llu±1 min\e[0m\n", + getDuration(tStart, tStop).c_str(), timeInterval); + } else { + printf("\n"); + } } printf("------------------------------------\n"); @@ -400,20 +422,6 @@ void drawLimit(double limit, TCanvas* c) l->Draw(); } -//---------------------------------------------------------------------------- -std::string getDuration(uint64_t tStart, uint64_t tStop) -{ - /// get the duration (dd hh:mm:ss) between the two time stamps (ms) - - auto dt = ms2s(tStop - tStart); - auto s = dt % 60; - auto m = (dt / 60) % 60; - auto h = (dt / 3600) % 24; - auto d = dt / 86400; - - return fmt::format("{:02}d {:02}:{:02}:{:02}", d, h, m, s); -} - //---------------------------------------------------------------------------- double getValue(DPVAL dp) { @@ -943,6 +951,7 @@ int main(int argc, char** argv) std::string what = ""; std::string config = ""; uint64_t minDuration = 0; + uint64_t timeInterval = 30; int warningLevel = 1; int printLevel = 1; std::string outFileName = ""; @@ -955,6 +964,7 @@ int main(int argc, char** argv) ("channels,c",po::value(&what)->default_value(""),R"(channel(s) to scan ("HV" or "LV" or comma separated list of (part of) DCS aliases))") ("configKeyValues",po::value(&config)->default_value(""),"Semicolon separated key=value strings to change HV thresholds") ("duration,d",po::value(&minDuration)->default_value(0),"minimum duration (ms) of HV/LV issues to consider") + ("interval,i",po::value(&timeInterval)->default_value(30),"creation time interval (minutes) between CCDB files") ("warning,w",po::value(&warningLevel)->default_value(1),"warning level (0, 1 or 2)") ("print,p",po::value(&printLevel)->default_value(1),"print level (0, 1, 2 or 3)") ("output,o",po::value(&outFileName)->default_value("scan.root"),"output root file name") @@ -1021,9 +1031,9 @@ int main(int argc, char** argv) // extract the time boundaries for each HV/LV file in the full time range auto dpBoundaries = getDPBoundaries(api, path.c_str(), runBoundaries.begin()->second.first, - runBoundaries.rbegin()->second.second); + runBoundaries.rbegin()->second.second, timeInterval * 60000); if (printLevel > 0) { - printDPBoundaries(dpBoundaries, scanHV); + printDPBoundaries(dpBoundaries, scanHV, timeInterval); } checkDPBoundaries(dpBoundaries, scanHV, runBoundaries.begin()->second.first, runBoundaries.rbegin()->second.second); From 4de0c6c5b3c7a259c182c62666cf8f211277d009 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 18:12:30 +0100 Subject: [PATCH 0068/1764] GPU: Provide general GPUFailedMsg functionality also externally --- GPU/Common/CMakeLists.txt | 1 + GPU/Common/GPUCommonChkErr.h | 21 +++++++++++++++++++ GPU/Common/GPUCommonDefAPI.h | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 15 +++++++++++++ GPU/GPUTracking/Base/GPUReconstruction.h | 3 +++ .../Base/GPUReconstructionDeviceBase.cxx | 6 +++++- .../Base/GPUReconstructionDeviceBase.h | 1 + .../Base/cuda/GPUReconstructionCUDA.cu | 16 ++------------ .../Base/cuda/GPUReconstructionCUDA.h | 4 ++-- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../cuda/GPUReconstructionCUDAInternals.h | 4 +--- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 4 +++- .../Base/opencl/GPUReconstructionOCL.cxx | 17 +++------------ .../Base/opencl/GPUReconstructionOCL.h | 3 +-- .../opencl/GPUReconstructionOCLIncludesHost.h | 4 +--- 15 files changed, 61 insertions(+), 42 deletions(-) create mode 100644 GPU/Common/GPUCommonChkErr.h diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 8466035d74ef7..8b0a75679479f 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -15,6 +15,7 @@ set(HDRS_INSTALL GPUCommonAlgorithm.h GPUCommonDef.h GPUCommonDefAPI.h + GPUCommonChkErr.h GPUCommonDefSettings.h GPUCommonConstants.h GPUCommonLogger.h diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h new file mode 100644 index 0000000000000..df007b31dab64 --- /dev/null +++ b/GPU/Common/GPUCommonChkErr.h @@ -0,0 +1,21 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUCommonChkErr.h +/// \author David Rohr + +#ifndef GPUCOMMONCHKERR_H +#define GPUCOMMONCHKERR_H + +#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true) +#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false) + +#endif diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index 0cd3c4ebddb7f..f7efbf7e976d4 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -36,7 +36,7 @@ #define GPUdni() // Device function, not-to-be-inlined #define GPUdnii() inline // Device function, not-to-be-inlined on device, inlined on host #define GPUh() // Host-only function - // NOTE: All GPUd*() functions are also compiled on the host during GCC compilation. + // NOTE: All GPUd*() functions are also compiled on the host during host compilation. // The GPUh*() macros are for the rare cases of functions that you want to compile for the host during GPU compilation. // Usually, you do not need the GPUh*() versions. If in doubt, use GPUd*()! #define GPUhi() inline // to-be-inlined host-only function diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index df9a7380834ce..28241cb7aeec5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -1078,6 +1078,21 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s return retVal; } +int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError) +{ + if (error == 0 || !GPUFailedMsgInternal(error, file, line)) { + return 0; + } + if (failOnError) { + if (mInitialized && mInErrorHandling == false) { + mInErrorHandling = true; + CheckErrorCodes(false, true); + } + throw std::runtime_error("GPU Backend Failure"); + } + return 1; +} + void GPUReconstruction::DumpSettings(const char* dir) { std::string f; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 93310284d7564..1fe08d08a8058 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -143,6 +143,7 @@ class GPUReconstruction virtual void* getGPUPointer(void* ptr) { return ptr; } virtual void startGPUProfiling() {} virtual void endGPUProfiling() {} + int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError); int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr); void RunPipelineWorker(); void TerminatePipelineWorker(); @@ -246,6 +247,7 @@ class GPUReconstruction void UpdateMaxMemoryUsed(); int32_t EnqueuePipeline(bool terminate = false); GPUChain* GetNextChainInQueue(); + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; } virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0; virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; @@ -327,6 +329,7 @@ class GPUReconstruction // Others bool mInitialized = false; + bool mInErrorHandling = false; uint32_t mStatNEvents = 0; uint32_t mNEventsProcessed = 0; double mStatKernelTime = 0.; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index d1091f59b784a..b389e99a0b2bb 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -175,7 +175,11 @@ void GPUReconstructionDeviceBase::runConstantRegistrators() { auto& list = getDeviceConstantMemRegistratorsVector(); for (uint32_t i = 0; i < list.size(); i++) { - mDeviceConstantMemList.emplace_back(list[i]()); + auto* ptr = list[i](); + if (ptr == nullptr) { + GPUFatal("Error registering constant memory"); + } + mDeviceConstantMemList.emplace_back(ptr); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 6cd3813ff1431..c4595bed4c3fb 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -46,6 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU virtual int32_t InitDevice_Runtime() = 0; int32_t ExitDevice() override; virtual int32_t ExitDevice_Runtime() = 0; + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0; int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override; int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d2adc3cc1fd19..40e3fa9b90eae 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -61,9 +61,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } } -int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); +int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line) { - // Check for CUDA Error and in the case of an error display the corresponding error string if (error == cudaSuccess) { return (0); } @@ -71,18 +71,6 @@ int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const return 1; } -void GPUReconstructionCUDABackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) -{ - if (GPUFailedMsgAI(error, file, line)) { - static bool runningCallbacks = false; - if (IsInitialized() && runningCallbacks == false) { - runningCallbacks = true; - CheckErrorCodes(false, true); - } - throw std::runtime_error("CUDA Failure"); - } -} - GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) { mDeviceBackendSettings.deviceType = DeviceType::CUDA; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index dde70b9076e08..e04e14bd383d3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line); - void GPUFailedMsgA(const int64_t error, const char* file, int32_t line); + static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); } template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 6bcafe565e930..521ca2182c9bb 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -36,7 +36,7 @@ using namespace o2::gpu; #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer) != cudaSuccess) { + if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 49c466103c593..a6d55c2d729fd 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -22,13 +22,11 @@ #include #include #include +#include "GPUCommonChkErr.h" namespace o2::gpu { -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - struct GPUReconstructionCUDAInternals { std::vector> kernelModules; // module for RTC compilation std::vector> kernelFunctions; // vector of ptrs to RTC kernels diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 0f8d9bf219ba6..a5ab353f3d43f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -138,7 +138,9 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - GPUReconstructionCUDA::GPUFailedMsgI(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer)); + if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + throw std::runtime_error("Could not obtain GPU constant memory symbol"); + } return retVal; }); #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index c7a8be62a12ea..6639c78b113e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -48,28 +48,17 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } } -int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); +int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { return (0); } - GPUError("OCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); + GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); return 1; } -void GPUReconstructionOCLBackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) -{ - if (GPUFailedMsgAI(error, file, line)) { - static bool runningCallbacks = false; - if (IsInitialized() && runningCallbacks == false) { - runningCallbacks = true; - CheckErrorCodes(false, true); - } - throw std::runtime_error("OpenCL Failure"); - } -} - void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() { GPUCA_GPUReconstructionUpdateDefaults(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 5132baa444cd9..79f54274cd32c 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -39,8 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; - int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line); - void GPUFailedMsgA(const int64_t error, const char* file, int32_t line); + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override; void SynchronizeGPU() override; int32_t DoStuckProtection(int32_t stream, deviceEvent event) override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index aec5708a80f3c..9c8cdbe87c7c1 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -28,6 +28,7 @@ #include "GPUReconstructionOCL.h" #include "GPUReconstructionIncludes.h" +#include "GPUCommonChkErr.h" using namespace o2::gpu; @@ -36,9 +37,6 @@ using namespace o2::gpu; #include #include -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - namespace o2::gpu { struct GPUReconstructionOCLInternals { From 78933b830a4d46b86b4a1360f2f9a8340962a0c7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 18:13:23 +0100 Subject: [PATCH 0069/1764] GPU: Rename GPUFailedMsg to GPUChkErr --- GPU/Common/GPUCommonChkErr.h | 13 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 4 +- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- .../Base/GPUReconstructionDeviceBase.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 154 +++++++++--------- .../Base/cuda/GPUReconstructionCUDA.h | 4 +- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../cuda/GPUReconstructionCUDAInternals.h | 10 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 12 +- .../Base/opencl/GPUReconstructionOCL.cxx | 72 ++++---- .../Base/opencl/GPUReconstructionOCL.h | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 16 +- 12 files changed, 152 insertions(+), 143 deletions(-) diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h index df007b31dab64..00cb9e50d302f 100644 --- a/GPU/Common/GPUCommonChkErr.h +++ b/GPU/Common/GPUCommonChkErr.h @@ -12,10 +12,19 @@ /// \file GPUCommonChkErr.h /// \author David Rohr +// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). +// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code +// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. +// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. +// The Macros must be called ona GPUReconstruction instance, e.g.: +// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } +// gpuRecObj.GPUChkErr(cudaMalloc(...)); + #ifndef GPUCOMMONCHKERR_H #define GPUCOMMONCHKERR_H -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true) -#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false) +// Please #include "GPUReconstruction.h" in your code, if you use these 2! +#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) +#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 28241cb7aeec5..2bd4c0e937c20 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -1078,9 +1078,9 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s return retVal; } -int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError) +int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError) { - if (error == 0 || !GPUFailedMsgInternal(error, file, line)) { + if (error == 0 || !GPUChkErrInternal(error, file, line)) { return 0; } if (failOnError) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 1fe08d08a8058..f363f3f58aa6f 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -143,7 +143,7 @@ class GPUReconstruction virtual void* getGPUPointer(void* ptr) { return ptr; } virtual void startGPUProfiling() {} virtual void endGPUProfiling() {} - int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError); + int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError); int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr); void RunPipelineWorker(); void TerminatePipelineWorker(); @@ -247,7 +247,7 @@ class GPUReconstruction void UpdateMaxMemoryUsed(); int32_t EnqueuePipeline(bool terminate = false); GPUChain* GetNextChainInQueue(); - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; } virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0; virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index c4595bed4c3fb..f0e19f588e0f1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -46,7 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU virtual int32_t InitDevice_Runtime() = 0; int32_t ExitDevice() override; virtual int32_t ExitDevice_Runtime() = 0; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0; + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override = 0; int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override; int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 40e3fa9b90eae..d30eb51bd4938 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -62,7 +62,7 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line) { if (error == cudaSuccess) { return (0); @@ -123,7 +123,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() cudaDeviceProp deviceProp; int32_t count, bestDevice = -1; double bestDeviceSpeed = -1, deviceSpeed; - if (GPUFailedMsgI(cudaGetDeviceCount(&count))) { + if (GPUChkErrI(cudaGetDeviceCount(&count))) { GPUError("Error getting CUDA Device Count"); return (1); } @@ -139,9 +139,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } size_t free, total; #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaInitDevice(i, 0, 0))) { + if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { #else // HIP - if (GPUFailedMsgI(hipSetDevice(i))) { + if (GPUChkErrI(hipSetDevice(i))) { #endif if (mProcessingSettings.debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); @@ -149,21 +149,21 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() continue; } contextCreated = true; - if (GPUFailedMsgI(cudaMemGetInfo(&free, &total))) { + if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (mProcessingSettings.debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUFailedMsg(cudaDeviceReset()); + GPUChkErr(cudaDeviceReset()); continue; } if (count > 1) { - GPUFailedMsg(cudaDeviceReset()); + GPUChkErr(cudaDeviceReset()); contextCreated = false; } if (mProcessingSettings.debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } - if (GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, i))) { + if (GPUChkErrI(cudaGetDeviceProperties(&deviceProp, i))) { continue; } if (mProcessingSettings.debugLevel >= 4) { @@ -221,13 +221,13 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } if (noDevice) { if (contextCreated) { - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); } return (1); } mDeviceId = bestDevice; - GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); + GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); if (mProcessingSettings.debugLevel >= 2) { GPUInfo("Using CUDA Device %s with Properties:", deviceProp.name); @@ -280,27 +280,27 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() #endif #ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUFailedMsgI(cudaInitDevice(mDeviceId, 0, 0))) { + if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { #else // HIP - if (contextCreated == 0 && GPUFailedMsgI(hipSetDevice(mDeviceId))) { + if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { #endif GPUError("Could not set CUDA Device!"); return (1); } #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } - if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { GPUError("Error setting CUDA stack size"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } #else // HIP - if (GPUFailedMsgI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) { + if (GPUChkErrI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) { GPUError("Could not set HIP Device flags!"); return (1); } @@ -319,35 +319,35 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Allocating memory on GPU"); } - if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUFailedMsgI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { + if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUChkErrI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { size_t free, total; - GPUFailedMsg(cudaMemGetInfo(&free, &total)); + GPUChkErr(cudaMemGetInfo(&free, &total)); GPUError("CUDA Memory Allocation Error (trying %ld bytes, %ld available on GPU, %ld free)", (int64_t)mDeviceMemorySize, (int64_t)deviceProp.totalGlobalMem, (int64_t)free); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Allocating memory on Host"); } - if (GPUFailedMsgI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { + if (GPUChkErrI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { GPUError("Error allocating Page Locked Host Memory (trying %ld bytes)", (int64_t)mHostMemorySize); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } if (mProcessingSettings.debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); - if (GPUFailedMsgI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { + if (GPUChkErrI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { GPUError("Error during CUDA memset"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } } for (int32_t i = 0; i < mNStreams; i++) { - if (GPUFailedMsgI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) { + if (GPUChkErrI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) { GPUError("Error creating CUDA Stream"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } } @@ -365,7 +365,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() else { #define GPUCA_KRNL(x_class, ...) \ mInternals->kernelModules.emplace_back(std::make_unique()); \ - GPUFailedMsg(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); + GPUChkErr(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL loadKernelModules(true); @@ -382,11 +382,11 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() CUdeviceptr tmp = nullptr; // HIP just uses void* #endif size_t tmpSize = 0; - GPUFailedMsg(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer")); + GPUChkErr(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer")); mDeviceConstantMemList.emplace_back((void*)tmp); } #else - GPUFailedMsg(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize)); + GPUChkErr(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize)); #endif mDeviceConstantMem = (GPUConstantMem*)devPtrConstantMem; @@ -402,7 +402,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mDeviceConstantMemList.resize(master->mDeviceConstantMemList.size()); std::copy(master->mDeviceConstantMemList.begin(), master->mDeviceConstantMemList.end(), mDeviceConstantMemList.begin()); mInternals = master->mInternals; - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); GPUInfo("CUDA Initialisation successfull (from master)"); } @@ -411,12 +411,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data(); for (uint32_t j = 0; j < mEvents[i].size(); j++) { #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaEventCreate(&events[j]))) { + if (GPUChkErrI(cudaEventCreate(&events[j]))) { #else - if (GPUFailedMsgI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) { + if (GPUChkErrI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) { #endif GPUError("Error creating event"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return 1; } } @@ -435,7 +435,7 @@ void GPUReconstructionCUDA::genAndLoadRTC() for (uint32_t i = 0; i < nCompile; i++) { if (mProcessingSettings.rtc.runTest != 2) { mInternals->kernelModules.emplace_back(std::make_unique()); - GPUFailedMsg(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); + GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); @@ -449,33 +449,33 @@ void GPUReconstructionCUDA::genAndLoadRTC() int32_t GPUReconstructionCUDA::ExitDevice_Runtime() { // Uninitialize CUDA - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); SynchronizeGPU(); unregisterRemainingRegisteredMemory(); for (uint32_t i = 0; i < mEvents.size(); i++) { cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data(); for (uint32_t j = 0; j < mEvents[i].size(); j++) { - GPUFailedMsgI(cudaEventDestroy(events[j])); + GPUChkErrI(cudaEventDestroy(events[j])); } } if (mMaster == nullptr) { - GPUFailedMsgI(cudaFree(mDeviceMemoryBase)); + GPUChkErrI(cudaFree(mDeviceMemoryBase)); #ifdef GPUCA_NO_CONSTANT_MEMORY - GPUFailedMsgI(cudaFree(mDeviceConstantMem)); + GPUChkErrI(cudaFree(mDeviceConstantMem)); #endif for (int32_t i = 0; i < mNStreams; i++) { - GPUFailedMsgI(cudaStreamDestroy(mInternals->Streams[i])); + GPUChkErrI(cudaStreamDestroy(mInternals->Streams[i])); } - GPUFailedMsgI(cudaFreeHost(mHostMemoryBase)); + GPUChkErrI(cudaFreeHost(mHostMemoryBase)); for (uint32_t i = 0; i < mInternals->kernelModules.size(); i++) { - GPUFailedMsg(cuModuleUnload(*mInternals->kernelModules[i])); + GPUChkErr(cuModuleUnload(*mInternals->kernelModules[i])); } - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); GPUInfo("CUDA Uninitialized"); } mDeviceMemoryBase = nullptr; @@ -491,18 +491,18 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, } if (stream == -1) { SynchronizeGPU(); - GPUFailedMsg(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost)); + GPUChkErr(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost)); } else { if (evList == nullptr) { nEvents = 0; } for (int32_t k = 0; k < nEvents; k++) { - GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); } - GPUFailedMsg(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); } if (ev) { - GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); + GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); @@ -518,13 +518,13 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s continue; } if (stream == -1) { - GPUFailedMsg(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice)); + GPUChkErr(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice)); } else { - GPUFailedMsg(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream])); } } if (ev && stream != -1) { - GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); + GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); @@ -533,28 +533,28 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s } void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} -void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } +void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); return GPUReconstructionProcessing::GetThreadContext(); } -void GPUReconstructionCUDA::SynchronizeGPU() { GPUFailedMsg(cudaDeviceSynchronize()); } -void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUFailedMsg(cudaStreamSynchronize(mInternals->Streams[stream])); } +void GPUReconstructionCUDA::SynchronizeGPU() { GPUChkErr(cudaDeviceSynchronize()); } +void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUChkErr(cudaStreamSynchronize(mInternals->Streams[stream])); } void GPUReconstructionCUDA::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(cudaEventSynchronize(evList[i].get())); + GPUChkErr(cudaEventSynchronize(evList[i].get())); } } void GPUReconstructionCUDA::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get(), 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get(), 0)); } } @@ -565,7 +565,7 @@ bool GPUReconstructionCUDA::IsEventDone(deviceEvent* evList, int32_t nEvents) if (retVal == cudaErrorNotReady) { return false; } - GPUFailedMsg(retVal); + GPUChkErr(retVal); } return (true); } @@ -582,7 +582,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool if (!force && mProcessingSettings.debugLevel <= 0) { return (0); } - if (GPUFailedMsgI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { + if (GPUChkErrI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { GPUError("CUDA Error while synchronizing (%s) (Stream %d)", state, stream); return (1); } @@ -597,23 +597,23 @@ int32_t GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, si if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Registering %zu bytes of memory for GPU", size); } - return GPUFailedMsgI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); + return GPUChkErrI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); } int32_t GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr) { - return GPUFailedMsgI(cudaHostUnregister((void*)ptr)); + return GPUChkErrI(cudaHostUnregister((void*)ptr)); } void GPUReconstructionCUDABackend::PrintKernelOccupancies() { int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); for (uint32_t i = 0; i < mInternals->kernelFunctions.size(); i++) { - GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy - GPUFailedMsg(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); - GPUFailedMsg(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); - GPUFailedMsg(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); + GPUChkErr(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy + GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); + GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); + GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); } } @@ -621,14 +621,14 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; -#define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ - mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ - if (mProcessingSettings.debugLevel >= 3) { \ - GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ - } \ - GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ +#define GPUCA_KRNL(x_class, ...) \ + getRTCkernelNum(mInternals->kernelFunctions.size()); \ + mInternals->kernelFunctions.emplace_back(new CUfunction); \ + mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ + if (mProcessingSettings.debugLevel >= 3) { \ + GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ + } \ + GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -644,27 +644,27 @@ int32_t GPUReconstructionCUDA::PrepareTextures() #ifdef GPUCA_USE_TEXTURES cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); size_t offset; - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); + GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); + GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); #endif return (0); } void GPUReconstructionCUDA::startGPUProfiling() { - GPUFailedMsg(cudaProfilerStart()); + GPUChkErr(cudaProfilerStart()); } void GPUReconstructionCUDA::endGPUProfiling() { - GPUFailedMsg(cudaProfilerStop()); + GPUChkErr(cudaProfilerStop()); } #else // HIP void* GPUReconstructionHIP::getGPUPointer(void* ptr) { void* retVal = nullptr; - GPUFailedMsg(hipHostGetDevicePointer(&retVal, ptr, 0)); + GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); return retVal; } #endif // __HIPCC__ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index e04e14bd383d3..02e8f92bb2328 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line); + static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); } template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 521ca2182c9bb..bc1d573385598 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -36,7 +36,7 @@ using namespace o2::gpu; #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index a6d55c2d729fd..027a9d5445b2c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -49,7 +49,7 @@ class GPUDebugTiming { if (mDo) { if (mDeviceTimers) { - mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[0].get(), mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[0].get(), mStreams[mXYZ.x.stream])); } else { mTimer.ResetStart(); } @@ -59,13 +59,13 @@ class GPUDebugTiming { if (mDo && mXYZ.t == 0.) { if (mDeviceTimers) { - mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[1].get(), mStreams[mXYZ.x.stream])); - mRec->GPUFailedMsg(cudaEventSynchronize(mDeviceTimers[1].get())); + mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[1].get(), mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaEventSynchronize(mDeviceTimers[1].get())); float v; - mRec->GPUFailedMsg(cudaEventElapsedTime(&v, mDeviceTimers[0].get(), mDeviceTimers[1].get())); + mRec->GPUChkErr(cudaEventElapsedTime(&v, mDeviceTimers[0].get(), mDeviceTimers[1].get())); mXYZ.t = v * 1.e-3f; } else { - mRec->GPUFailedMsg(cudaStreamSynchronize(mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaStreamSynchronize(mStreams[mXYZ.x.stream])); mXYZ.t = mTimer.GetCurrentElapsedTime(); } } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index a5ab353f3d43f..f60f00c13710d 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -34,7 +34,7 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC template <> inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - GPUFailedMsg(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); + GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } template @@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -67,16 +67,16 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgsStreams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); } } { GPUDebugTiming timer(mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); } - GPUFailedMsg(cudaGetLastError()); + GPUChkErr(cudaGetLastError()); if (z.ev) { - GPUFailedMsg(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream])); + GPUChkErr(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream])); } } @@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 6639c78b113e5..e52494937f8bf 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -49,7 +49,7 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); -int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { @@ -69,7 +69,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() if (mMaster == nullptr) { cl_int ocl_error; cl_uint num_platforms; - if (GPUFailedMsgI(clGetPlatformIDs(0, nullptr, &num_platforms))) { + if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) { GPUErrorReturn("Error getting OpenCL Platform Count"); } if (num_platforms == 0) { @@ -82,7 +82,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() // Query platforms and devices std::unique_ptr platforms; platforms.reset(new cl_platform_id[num_platforms]); - if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { + if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { GPUErrorReturn("Error getting OpenCL Platforms"); } @@ -227,7 +227,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation"); } mInternals->platform = platforms[bestPlatform]; - GPUFailedMsg(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); + GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); mInternals->device = devices[bestDevice]; queryDevice(mInternals->device); @@ -267,7 +267,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mMaxBackendThreads = std::max(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount); mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Could not create OPENCL Device Context!"); } @@ -280,13 +280,13 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { clReleaseContext(mInternals->context); GPUErrorReturn("OPENCL Memory Allocation Error"); } mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { clReleaseMemObject(mInternals->mem_gpu); clReleaseContext(mInternals->context); GPUErrorReturn("OPENCL Constant Memory Allocation Error"); @@ -314,44 +314,44 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() #else mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error); #endif - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating OpenCL command queue"); } } - if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { + if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { GPUErrorReturn("Error migrating buffer"); } - if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { + if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { GPUErrorReturn("Error migrating buffer"); } mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error allocating pinned host memory"); } const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}"; cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating program object"); } ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { char build_log[16384]; clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr); GPUImportant("Build Log:\n\n%s\n\n", build_log); GPUErrorReturn("Error compiling program"); } cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating kernel"); } - if (GPUFailedMsgI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || - GPUFailedMsgI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) || - GPUFailedMsgI(clFinish(mInternals->command_queue[0])) || - GPUFailedMsgI(clReleaseKernel(kernel)) || - GPUFailedMsgI(clReleaseProgram(program))) { + if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || + GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) || + GPUChkErrI(clFinish(mInternals->command_queue[0])) || + GPUChkErrI(clReleaseKernel(kernel)) || + GPUChkErrI(clReleaseProgram(program))) { GPUErrorReturn("Error obtaining device memory ptr"); } @@ -359,7 +359,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUInfo("Mapping hostmemory"); } mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error allocating Page Locked Host Memory"); } @@ -435,14 +435,14 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t } if (size == 0) { if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0 - GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); } } else if (toGPU == -2) { - GPUFailedMsg(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); } else if (toGPU) { - GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); } else { - GPUFailedMsg(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); @@ -455,16 +455,16 @@ size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const v if (stream == -1) { SynchronizeGPU(); } - GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); + GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); if (mProcessingSettings.serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; } -void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get())); } +void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } -void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } +void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event) { @@ -490,18 +490,18 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve void GPUReconstructionOCLBackend::SynchronizeGPU() { for (int32_t i = 0; i < mNStreams; i++) { - GPUFailedMsg(clFinish(mInternals->command_queue[i])); + GPUChkErr(clFinish(mInternals->command_queue[i])); } } -void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUFailedMsg(clFinish(mInternals->command_queue[stream])); } +void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } -void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList())); } +void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { if (nEvents) { - GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); + GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); } } @@ -509,7 +509,7 @@ bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEven { cl_int eventdone; for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(clGetEventInfo(evList[i].get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr)); + GPUChkErr(clGetEventInfo(evList[i].get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr)); if (eventdone != CL_COMPLETE) { return false; } @@ -524,7 +524,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, return (0); } for (int32_t i = 0; i < mNStreams; i++) { - if (GPUFailedMsgI(clFinish(mInternals->command_queue[i]))) { + if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) { GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i); } } @@ -554,14 +554,14 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error); } - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUError("Error creating OpenCL program from binary"); return 1; } - if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { + if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { cl_build_status status; - if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { + if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { size_t log_size; clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); std::unique_ptr build_log(new char[log_size + 1]); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 79f54274cd32c..2abae229c74bb 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -39,7 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override; + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; void SynchronizeGPU() override; int32_t DoStuckProtection(int32_t stream, deviceEvent event) override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 8a6c889773cb0..4f6a8725b4be5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -18,7 +18,7 @@ template <> inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; - GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); + GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); } template @@ -28,7 +28,7 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu auto& x = _xyz.x; auto& y = _xyz.y; auto& z = _xyz.z; - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); + GPUChkErr(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); cl_event ev; cl_event* evr; @@ -39,15 +39,15 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu } else { evr = (cl_event*)z.ev; } - GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); + GPUChkErr(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { cl_ulong time_start, time_end; - GPUFailedMsg(clWaitForEvents(1, evr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); + GPUChkErr(clWaitForEvents(1, evr)); + GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); + GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); _xyz.t = (time_end - time_start) * 1.e-9f; if (tmpEvent) { - GPUFailedMsg(clReleaseEvent(ev)); + GPUChkErr(clReleaseEvent(ev)); } } } @@ -80,7 +80,7 @@ int32_t GPUReconstructionOCLBackend::AddKernel() cl_int ocl_error; cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUError("Error creating OPENCL Kernel: %s", name.c_str()); return 1; } From 2fef8797a1b38b4fa1fbb7316481b8bc3062db10 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 14:52:53 +0100 Subject: [PATCH 0070/1764] Fix compiler warning --- Generators/src/GeneratorFromFile.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx index 6d4e85afa6721..e37a3886c24e1 100644 --- a/Generators/src/GeneratorFromFile.cxx +++ b/Generators/src/GeneratorFromFile.cxx @@ -361,7 +361,7 @@ namespace std::vector executeCommand(const std::string& command) { std::vector result; - std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); + std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); if (!pipe) { throw std::runtime_error("Failed to open pipe"); } From 20f1352fa03206c836a48dcb3089a9db1a526f68 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 15:06:29 +0100 Subject: [PATCH 0071/1764] GPU: Clean up more of C++ < 11 compatibility code --- GPU/GPUTracking/Base/GPUConstantMem.h | 2 +- GPU/GPUTracking/Base/GPUGeneralKernels.h | 6 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 4 +- .../Base/GPUReconstructionKernels.h | 8 +-- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 60 ++++++++----------- GPU/GPUTracking/Global/GPUChain.cxx | 12 ++-- GPU/GPUTracking/Global/GPUChain.h | 12 ++-- .../SectorTracker/GPUTPCCreateTrackingData.h | 2 +- .../GPUTPCExtrapolationTracking.h | 4 +- .../SectorTracker/GPUTPCNeighboursCleaner.h | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.h | 2 +- .../SectorTracker/GPUTPCStartHitsFinder.h | 2 +- .../SectorTracker/GPUTPCStartHitsSorter.h | 2 +- .../SectorTracker/GPUTPCTrackletConstructor.h | 2 +- .../SectorTracker/GPUTPCTrackletSelector.h | 2 +- .../TRDTracking/GPUTRDTrackerKernels.h | 2 +- 16 files changed, 58 insertions(+), 66 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 8f1cc90f5ae93..e0b06f0a3ea55 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -96,7 +96,7 @@ static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + si #endif } // namespace o2::gpu #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) -GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; +GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 71980d38fdc9e..ce93e2e5eead8 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -79,7 +79,7 @@ class GPUKernelTemplate }; typedef GPUconstantref() GPUConstantMem processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors; @@ -94,7 +94,7 @@ class GPUKernelTemplate class GPUMemClean16 : public GPUKernelTemplate { public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); }; @@ -103,7 +103,7 @@ class GPUMemClean16 : public GPUKernelTemplate class GPUitoa : public GPUKernelTemplate { public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index f90820281c74d..fd999ec2304e1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -134,8 +134,8 @@ template inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) { HighResTimer* t = nullptr; - GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep(); - if (myStep == GPUCA_RECO_STEP::NoRecoStep) { + GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); + if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { throw std::runtime_error("Failure running general kernel without defining RecoStep"); } int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index ba30f38e902ad..b8f3e3746c743 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -30,14 +30,14 @@ struct classArgument { }; struct krnlExec { - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUCA_RECO_STEP::NoRecoStep) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} uint32_t nBlocks; uint32_t nThreads; int32_t stream; GPUReconstruction::krnlDeviceType device; - GPUCA_RECO_STEP step; + GPUDataTypes::RecoStep step; }; struct krnlRunRange { constexpr krnlRunRange() = default; diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index f7bfe38be988d..6cc1e7266e722 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -96,9 +96,6 @@ struct TPCPadGainCalib; struct TPCZSLinkMapping; #include "utils/bitfield.h" -#define ENUM_CLASS class -#define ENUM_UINT : uint32_t -#define GPUCA_RECO_STEP GPUDataTypes::RecoStep class GPUTPCTrack; class GPUTPCHitId; @@ -117,36 +114,33 @@ struct GPUSettingsTF; class GPUDataTypes { public: - enum ENUM_CLASS GeometryType ENUM_UINT{RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2}; - enum DeviceType ENUM_UINT { INVALID_DEVICE = 0, - CPU = 1, - CUDA = 2, - HIP = 3, - OCL = 4 }; - enum ENUM_CLASS GeneralStep { Prepare = 1, - QA = 2 }; + // clang-format off + enum class GeometryType : uint32_t { RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2 }; + enum DeviceType : uint32_t { INVALID_DEVICE = 0, CPU = 1, CUDA = 2, HIP = 3, OCL = 4 }; + enum class GeneralStep { Prepare = 1, QA = 2 }; + // clang-format on - enum ENUM_CLASS RecoStep { TPCConversion = 1, - TPCSectorTracking = 2, - TPCMerging = 4, - TPCCompression = 8, - TRDTracking = 16, - ITSTracking = 32, - TPCdEdx = 64, - TPCClusterFinding = 128, - TPCDecompression = 256, - Refit = 512, - AllRecoSteps = 0x7FFFFFFF, - NoRecoStep = 0 }; - enum ENUM_CLASS InOutType { TPCClusters = 1, - OBSOLETE = 2, - TPCMergedTracks = 4, - TPCCompressedClusters = 8, - TRDTracklets = 16, - TRDTracks = 32, - TPCRaw = 64, - ITSClusters = 128, - ITSTracks = 256 }; + enum class RecoStep { TPCConversion = 1, + TPCSectorTracking = 2, + TPCMerging = 4, + TPCCompression = 8, + TRDTracking = 16, + ITSTracking = 32, + TPCdEdx = 64, + TPCClusterFinding = 128, + TPCDecompression = 256, + Refit = 512, + AllRecoSteps = 0x7FFFFFFF, + NoRecoStep = 0 }; + enum class InOutType { TPCClusters = 1, + OBSOLETE = 2, + TPCMergedTracks = 4, + TPCCompressedClusters = 8, + TRDTracklets = 16, + TRDTracks = 32, + TPCRaw = 64, + ITSClusters = 128, + ITSTracks = 256 }; #ifndef __OPENCL__ static constexpr const char* const DEVICE_TYPE_NAMES[] = {"INVALID", "CPU", "CUDA", "HIP", "OCL"}; static constexpr const char* const RECO_STEP_NAMES[] = {"TPC Transformation", "TPC Sector Tracking", "TPC Track Merging and Fit", "TPC Compression", "TRD Tracking", "ITS Tracking", "TPC dEdx Computation", "TPC Cluster Finding", "TPC Decompression", "Global Refit"}; @@ -312,8 +306,6 @@ struct GPUTrackingInOutPointers { const GPUSettingsTF* settingsTF = nullptr; }; -#undef ENUM_CLASS -#undef ENUM_UINT } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChain.cxx b/GPU/GPUTracking/Global/GPUChain.cxx index 6990d5e08b638..300de31a509ba 100644 --- a/GPU/GPUTracking/Global/GPUChain.cxx +++ b/GPU/GPUTracking/Global/GPUChain.cxx @@ -18,33 +18,33 @@ using namespace o2::gpu; constexpr GPUChain::krnlRunRange GPUChain::krnlRunRangeNone; constexpr GPUChain::krnlEvent GPUChain::krnlEventNone; -GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { const uint32_t nBlocks = (totalItems + nThreads - 1) / nThreads; return {nBlocks, nThreads, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-1, totalItems, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-2, nBlocks, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st) { return {(uint32_t)-2, nBlocks, stream, GPUReconstruction::krnlDeviceType::Auto, st}; } -GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-3, 0, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st) { return {(uint32_t)-3, 0, stream, GPUReconstruction::krnlDeviceType::Auto, st}; } diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index b9da1c9a330d3..fff5d2efe0270 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -192,15 +192,15 @@ class GPUChain return mRec->getTimer(name, num); } // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems - krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems - krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with specified number of blocks, each block with ideal number of threads - krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); - krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); + krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with ideal number of threads / blocks for GPU - krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); - krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); + krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); inline uint32_t BlockCount() const { return mRec->mBlockCount; } inline uint32_t WarpSize() const { return mRec->mWarpSize; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h index 9327699c9404b..dc1beacf79d02 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h @@ -32,7 +32,7 @@ class GPUTPCCreateTrackingData : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h index 2d2b275d06399..91a33d132f136 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h @@ -30,7 +30,7 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -50,7 +50,7 @@ class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h index 7af6e8eb1a582..de79b268aea78 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h @@ -38,7 +38,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 54dc0876f8a55..41b5eb8a4ffb8 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -49,7 +49,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h index 5e620180570c8..c834b17369f0f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h @@ -38,7 +38,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h index b0349d660dbc1..0e2fd96dd2690 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h @@ -38,7 +38,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 8757ed87072da..0f8314ee0fad4 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -97,7 +97,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index bb969d866ef29..5009c672b030e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -42,7 +42,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 70b525420f294..21135ddc48dfa 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -26,7 +26,7 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate enum K { defaultKernel = 0, gpuVersion = 0, o2Version = 1 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TRDTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TRDTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; From 315cfa4216eeda737371a8d4eac108b81e23d881 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 15:06:57 +0100 Subject: [PATCH 0072/1764] GPU: Provide static versions of GPUChkErr() macros test2 GPU: Provide static versions of GPUChkErr() macros --- GPU/Common/CMakeLists.txt | 2 +- GPU/Common/GPUCommonChkErr.h | 30 --------- GPU/Common/GPUCommonDef.h | 24 +++++-- GPU/Common/GPUCommonHelpers.h | 62 +++++++++++++++++++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 9 +-- .../Base/cuda/GPUReconstructionCUDA.h | 3 +- .../GPUReconstructionCUDAExternalProvider.cu | 3 +- .../cuda/GPUReconstructionCUDAHelpers.inc | 31 ++++++++++ .../cuda/GPUReconstructionCUDAInternals.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +- .../Base/opencl/GPUReconstructionOCL.cxx | 7 +-- .../opencl/GPUReconstructionOCLIncludesHost.h | 2 +- 14 files changed, 127 insertions(+), 56 deletions(-) delete mode 100644 GPU/Common/GPUCommonChkErr.h create mode 100644 GPU/Common/GPUCommonHelpers.h create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 8b0a75679479f..bacf4454c39fd 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -15,7 +15,7 @@ set(HDRS_INSTALL GPUCommonAlgorithm.h GPUCommonDef.h GPUCommonDefAPI.h - GPUCommonChkErr.h + GPUCommonHelpers.h GPUCommonDefSettings.h GPUCommonConstants.h GPUCommonLogger.h diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h deleted file mode 100644 index 00cb9e50d302f..0000000000000 --- a/GPU/Common/GPUCommonChkErr.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUCommonChkErr.h -/// \author David Rohr - -// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). -// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code -// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. -// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. -// The Macros must be called ona GPUReconstruction instance, e.g.: -// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } -// gpuRecObj.GPUChkErr(cudaMalloc(...)); - -#ifndef GPUCOMMONCHKERR_H -#define GPUCOMMONCHKERR_H - -// Please #include "GPUReconstruction.h" in your code, if you use these 2! -#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) -#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) - -#endif diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index ae8c48b500b69..b4a788e66a81c 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -35,13 +35,25 @@ #define GPUCA_GPUCODE // Compiled by GPU compiler #endif - #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__) - #define GPUCA_GPUCODE_DEVICE // Executed on device + #if defined(GPUCA_GPUCODE) + #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__) + #define GPUCA_GPUCODE_DEVICE // Executed on device + #endif + #if defined(__CUDACC__) + #define GPUCA_GPUTYPE CUDA + #elif defined(__HIPCC__) + #define GPUCA_GPUTYPE HIP + #elif defined(__OPENCL__) || defined(__OPENCL_HOST__) + #define GPUCA_GPUTYPE OCL + #endif #endif #endif +#ifndef GPUCA_GPUTYPE + #define GPUCA_GPUTYPE CPU +#endif #if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined (GPUCA_GPUCODE) - #define GPUCA_ALIGPUCODE + #define GPUCA_ALIGPUCODE // Part of GPUTracking library but not of interface #endif #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) @@ -51,13 +63,13 @@ #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) -#define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ + #define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ #else -#define GPUCA_DEBUG_STREAMER_CHECK(...) + #define GPUCA_DEBUG_STREAMER_CHECK(...) #endif #ifndef GPUCA_RTC_SPECIAL_CODE -#define GPUCA_RTC_SPECIAL_CODE(...) + #define GPUCA_RTC_SPECIAL_CODE(...) #endif // API Definitions for GPU Compilation diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h new file mode 100644 index 0000000000000..ad876db0d6c3a --- /dev/null +++ b/GPU/Common/GPUCommonHelpers.h @@ -0,0 +1,62 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUCommonHelpers.h +/// \author David Rohr + +// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). +// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code +// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. +// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. +// These Macros must be called ona GPUReconstruction instance. +// The GPUChkErrS and GPUChkErrSI are similar but static, without required GPUReconstruction instance. +// Examples: +// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } +// gpuRecObj.GPUChkErr(cudaMalloc(...)); +// if (GPUChkErrSI(cudaMalloc(..))) { exit(1); } + +#ifndef GPUCOMMONHELPERS_H +#define GPUCOMMONHELPERS_H + +// Please #include "GPUReconstruction.h" in your code, if you use these 2! +#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) +#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) +#define GPUChkErrS(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, true) +#define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false) + +#include "GPUCommonDef.h" +#include + +namespace o2::gpu::internal +{ +#define GPUCOMMON_INTERNAL_CAT_A(a, b, c) a##b##c +#define GPUCOMMON_INTERNAL_CAT(...) GPUCOMMON_INTERNAL_CAT_A(__VA_ARGS__) +extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(const int64_t error, const char* file, int32_t line); +inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line) +{ + if (error) { + GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line); + } + return error != 0; +} +static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* file, int32_t line, bool failOnError) +{ + int32_t retVal = error && GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(error, file, line); + if (retVal && failOnError) { + throw std::runtime_error("GPU API Call Failure"); + } + return error; +} +#undef GPUCOMMON_INTERNAL_CAT_A +#undef GPUCOMMON_INTERNAL_CAT +} // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index a33234db49a27..5b2e53179e50c 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -18,7 +18,7 @@ endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) -set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) +set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d30eb51bd4938..d0d5ef4680fac 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -22,6 +22,7 @@ #include "CUDAThrustHelpers.h" #include "GPUReconstructionIncludes.h" #include "GPUParamRTC.h" +#include "GPUReconstructionCUDAHelpers.inc" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" @@ -62,13 +63,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { - if (error == cudaSuccess) { - return (0); - } - GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line); - return 1; + return internal::GPUReconstructionCUDAChkErr(error, file, line); } GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 02e8f92bb2328..f78270d40146c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,12 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; - virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index bc1d573385598..f341a778076b8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -32,11 +32,12 @@ using namespace o2::gpu; #include "TrackParametrizationWithError.cxx" #include "Propagator.cxx" #include "TrackLTIntegral.cxx" +#include "GPUReconstructionCUDAHelpers.inc" #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc new file mode 100644 index 0000000000000..a34f940a1337a --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc @@ -0,0 +1,31 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDAHelpers.inc +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONCUDAHELPERS_INC_H +#define GPURECONSTRUCTIONCUDAHELPERS_INC_H + +#include "GPUCommonHelpers.h" + +namespace o2::gpu::internal +{ +int32_t __attribute__((weak)) GPUReconstructionCUDAChkErr(const int64_t error, const char* file, int32_t line) +{ + if (error != cudaSuccess) { + GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line); + } + return error != cudaSuccess; +} +} // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 027a9d5445b2c..c85d98d85420e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -22,7 +22,7 @@ #include #include #include -#include "GPUCommonChkErr.h" +#include "GPUCommonHelpers.h" namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f60f00c13710d..0c83223ba238a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 30f6683ff93c5..21a641c0cc7c0 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -63,7 +63,7 @@ endif() set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) -set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) +set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e52494937f8bf..e92205b9864e6 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -52,11 +52,10 @@ static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string - if (error == CL_SUCCESS) { - return (0); + if (error != CL_SUCCESS) { + GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); } - GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); - return 1; + return error != CL_SUCCESS; } void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 9c8cdbe87c7c1..97316cf9aa32e 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -28,7 +28,7 @@ #include "GPUReconstructionOCL.h" #include "GPUReconstructionIncludes.h" -#include "GPUCommonChkErr.h" +#include "GPUCommonHelpers.h" using namespace o2::gpu; From b8feb4d10f27015e45df85ec17cba5758ad523a5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 16:15:23 +0100 Subject: [PATCH 0073/1764] GPU: Automatically derive GPUReconstruction backend class from preprocessor constant --- GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h | 4 ++-- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h | 1 - .../Base/opencl/GPUReconstructionOCLKernels.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 ++++---- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index cd1180cbc9991..0b1a501ebc094 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -68,7 +68,7 @@ // GPU Host wrappers for kernel #define GPUCA_KRNL_HOST(x_class, ...) \ GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal { \ + template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ @@ -80,7 +80,7 @@ }; #define GPUCA_KRNL_PROP(x_class, x_attributes) \ - template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ + template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend() { \ gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h index 7f77925ca3aaa..4ed352279fb90 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h @@ -34,6 +34,5 @@ #define GPUCA_CONSMEM_CALL me->mDeviceConstantMem, #define GPUCA_CONSMEM ((GPUConstantMem&)(*gGPUConstantMemBuffer)) #endif -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionCUDABackend #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 4f6a8725b4be5..ce6b6553ae1f7 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -109,6 +109,5 @@ int32_t GPUReconstructionOCLBackend::AddKernels() #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ GPUCA_KRNL_PROP(x_class, x_attributes) \ template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index fa85d796baeba..e6312d767a496 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -769,7 +769,7 @@ struct MergeBorderTracks_compMin { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); ThrustVolatileAsyncAllocator alloc(this); @@ -1873,7 +1873,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); ThrustVolatileAsyncAllocator alloc(this); @@ -1881,7 +1881,7 @@ inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); ThrustVolatileAsyncAllocator alloc(this); @@ -2106,7 +2106,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); ThrustVolatileAsyncAllocator alloc(this); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 45293bae9820b..13f204d0f940a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -102,7 +102,7 @@ struct GPUTPCGMO2OutputSort_comp { }; template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); ThrustVolatileAsyncAllocator alloc(this); From 154ffd467127ab9eb92649e78cbd2fdfa90f6f68 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 16:33:57 +0100 Subject: [PATCH 0074/1764] GPU: Plenty of clang-format fixes --- GPU/GPUTracking/Base/GPUParam.h | 8 +-- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- .../Base/GPUReconstructionConvert.cxx | 2 +- .../Base/GPUReconstructionProcessing.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/DataTypes/GPUOutputControl.h | 2 +- GPU/GPUTracking/DataTypes/GPUSettings.h | 6 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.h | 6 +- GPU/GPUTracking/Global/GPUChain.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.h | 2 +- .../Global/GPUChainTrackingClusterizer.cxx | 3 +- .../Global/GPUChainTrackingRefit.cxx | 4 +- GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- .../GPUO2InterfaceConfigurableParam.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 16 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 6 +- .../Merger/GPUTPCGMPolynomialFieldManager.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- .../SectorTracker/GPUTPCTracklet.h | 2 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 12 ++-- .../GPUTPCCFNoiseSuppression.cxx | 4 +- .../TRDTracking/GPUTRDInterfaces.h | 4 +- GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h | 18 +++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 24 ++++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 62 +++++++++---------- .../TRDTracking/GPUTRDTrackletWord.h | 2 +- .../display/frontend/GPUDisplayFrontend.h | 8 +-- .../frontend/GPUDisplayFrontendWindows.cxx | 12 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 6 +- GPU/GPUTracking/qa/genEvents.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 +-- GPU/GPUTracking/utils/threadserver.h | 4 +- GPU/GPUbenchmark/cuda/Kernels.cu | 6 +- GPU/TPCFastTransformation/BandMatrixSolver.h | 2 +- GPU/TPCFastTransformation/ChebyshevFit1D.cxx | 4 +- .../CorrectionMapsHelper.h | 28 ++++----- .../NDPiecewisePolynomials.h | 14 ++--- GPU/TPCFastTransformation/Spline1DHelper.cxx | 8 +-- GPU/TPCFastTransformation/Spline1DHelperOld.h | 20 +++--- GPU/TPCFastTransformation/Spline1DSpec.cxx | 2 +- GPU/TPCFastTransformation/Spline1DSpec.h | 12 ++-- GPU/TPCFastTransformation/Spline2DSpec.cxx | 2 +- GPU/TPCFastTransformation/SplineHelper.cxx | 44 ++++++------- GPU/TPCFastTransformation/SplineHelper.h | 6 +- GPU/TPCFastTransformation/SplineSpec.h | 14 ++--- .../TPCFastSpaceChargeCorrection.h | 2 +- .../IrregularSpline2D3DCalibrator.cxx | 2 +- .../devtools/RegularSpline1D.h | 12 ++-- .../devtools/SemiregularSpline2D3D.cxx | 8 +-- .../devtools/SemiregularSpline2D3D.h | 32 +++++----- .../test/testMultivarPolynomials.cxx | 4 +- GPU/Utils/FlatObject.h | 12 ++-- GPU/Workflow/helper/src/GPUWorkflowHelper.cxx | 26 ++++---- 54 files changed, 250 insertions(+), 255 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 9bdf705dfeb59..fbce6246de112 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -59,10 +59,10 @@ struct GPUParam_t { int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; - GPUTPCGeometry tpcGeometry; // TPC Geometry - GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM - const uint32_t* occupancyMap; // Ptr to TPC occupancy map - uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) + GPUTPCGeometry tpcGeometry; // TPC Geometry + GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM + const uint32_t* occupancyMap; // Ptr to TPC occupancy map + uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) GPUParamSector SectorParam[GPUCA_NSECTORS]; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index f363f3f58aa6f..5e03c77f08230 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -52,7 +52,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent; class threadContext; -} +} // namespace gpu_reconstruction_kernels class GPUReconstruction { @@ -193,7 +193,7 @@ class GPUReconstruction bool IsInitialized() const { return mInitialized; } void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr); void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr); - void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() + void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings() void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr); void UpdateDynamicSettings(const GPUSettingsRecDynamic* d); diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index 8f5cab6807050..bc760f6188caa 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -923,7 +923,7 @@ void zsEncoderDenseLinkBased::decodePage(std::vector& outputBuff if (decLinkX & 0b00100000) { bitmaskL2.set(); } else { - bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t) * ((const uint8_t*)decPagePtr)); + bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t)*((const uint8_t*)decPagePtr)); decPagePtr += sizeof(uint8_t); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4ccfb9ff10311..43560616782db 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -28,7 +28,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent { constexpr deviceEvent() = default; - constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; + constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {}; template void set(T val) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d0d5ef4680fac..202edd49bc44c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -496,7 +496,7 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, for (int32_t k = 0; k < nEvents; k++) { GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); } - GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : (toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost), mInternals->Streams[stream])); } if (ev) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index 799fd25330ab4..0495f7ed1d0ff 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -78,7 +78,7 @@ struct GPUTrackingOutputs { static constexpr size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(GPUOutputControl); } GPUOutputControl* asArray() { return (GPUOutputControl*)this; } size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; } - static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } + static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::* v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index 05888770ef9e5..c81a8e20e9926 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -73,9 +73,9 @@ struct GPUSettingsTF { // Settings defining the setup of the GPUReconstruction processing (basically selecting the device / class instance) struct GPUSettingsDeviceBackend { - uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA - uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU - GPUReconstruction* master = nullptr; // GPUReconstruction master object + uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA + uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU + GPUReconstruction* master = nullptr; // GPUReconstruction master object }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 18f7c61e01fc3..b358e8b82d480 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -107,9 +107,9 @@ class GPUTRDTrack_t : public T GPUd() void setHasPadrowCrossing() { mIsCrossingNeighbor |= (1U << 7); } protected: - float mChi2; // total chi2. - float mSignal{-1.f}; // electron Likelihood for track - uint32_t mRefGlobalTrackId; // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC) + float mChi2; // total chi2. + float mSignal{-1.f}; // electron Likelihood for track + uint32_t mRefGlobalTrackId; // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC) int32_t mAttachedTracklets[kNLayers]; // indices of the tracklets attached to this track; -1 means no tracklet in that layer int16_t mCollisionId; // the collision ID of the tracklets attached to this track; is used to retrieve the BC information for this track after the tracking is done uint8_t mFlags; // bits 0 to 5 indicate whether track is findable in layer 0 to 5, bit 6 indicates an ambiguous track and bit 7 flags if the track is stopped in the TRD diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index fff5d2efe0270..290ae32cafca8 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -46,7 +46,7 @@ class GPUChain virtual int32_t Finalize() = 0; virtual int32_t RunChain() = 0; virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0; - virtual void PrintMemoryStatistics(){}; + virtual void PrintMemoryStatistics() {}; virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr) { return 0; } virtual bool SupportsDoublePipeline() { return false; } virtual int32_t FinalizePipelinedProcessing() { return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 492ee65d1c9c1..5779cec31130c 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -43,7 +43,7 @@ class MatLayerCylSet; namespace o2::gpu { -//class GPUTRDTrackerGPU; +// class GPUTRDTrackerGPU; class GPUTPCGPUTracker; class GPUDisplayInterface; class GPUQA; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 63d56da37595b..a48050a6cacbc 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -154,8 +154,7 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) - : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0); for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 9e7085b31849e..8d1efd7011227 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -31,13 +31,13 @@ int32_t GPUChainTracking::RunRefit() RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); - //TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); + // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { runKernel(GetGrid(mIOPtrs.nMergedTracks, 0)); } else { runKernel(GetGrid(mIOPtrs.nMergedTracks, 0)); } - //TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); + // TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); SynchronizeStream(0); return 0; } diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 34cd5b7280dc3..4dac56afed671 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -46,7 +46,7 @@ struct GPUO2Interface_Internals { }; } // namespace o2::gpu -GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals){}; +GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals) {}; GPUO2Interface::~GPUO2Interface() { Deinitialize(); } diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h index 425c8b880b4e3..ebb426b7a8cfe 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h @@ -50,7 +50,7 @@ #define AddSubConfig(name, instance) #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, o2prefix) \ struct GPUCA_M_CAT(GPUConfigurableParam, name) : public o2::conf::ConfigurableParamHelper { \ - O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: + O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: #define BeginHiddenConfig(name, instance) struct GPUCA_M_CAT(GPUConfigurableParam, name) { #define EndConfig() \ } \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 64d8549312736..578fe1eeb4ca7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -106,17 +106,17 @@ class GPUTPCGMMergedTrack GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } private: - GPUTPCGMTrackParam mParam; //* fitted track parameters + GPUTPCGMTrackParam mParam; //* fitted track parameters gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param - float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z - uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays + float mAlpha; //* alpha angle + float mLastX; //* outer X + float mLastY; //* outer Y + float mLastZ; //* outer Z + uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit - uint32_t mNClusters; //* number of track clusters - uint32_t mNClustersFitted; //* number of clusters used in fit + uint32_t mNClusters; //* number of track clusters + uint32_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; uint8_t mLegs; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index e6312d767a496..1c2a8e2b29a9c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -2140,7 +2140,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, } const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y); if (d2xy > 15.f) { - //bs |= 1; + // bs |= 1; continue; } const auto& trk1 = mOutputTracks[params[i].id]; @@ -2148,7 +2148,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { - //bs |= 2; + // bs |= 2; continue; } @@ -2170,7 +2170,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, } } if (!dzcorrok) { - //bs |= 4; + // bs |= 4; continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index 88f0882a79f03..4a608fcc97068 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -39,11 +39,11 @@ class GPUTPCGMPolynomialFieldManager GPUTPCGMPolynomialFieldManager() = default; /* Get appropriate pre-calculated polynomial field for the given field value nominalFieldkG - */ + */ static int32_t GetPolynomialField(float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); /* Get pre-calculated polynomial field of type "type", scaled with respect to nominalFieldkG - */ + */ static int32_t GetPolynomialField(StoredField_t type, float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index e15d6fe8b17bd..0c171a74d4e42 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -716,7 +716,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict const float ImP1 = mP[1] + Ik11 * Iz1; const float ImC0 = mC[0] - Ik00 * mC[0]; const float ImC2 = mC[2] - Ik11 * mC[2]; - //printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ)); + // printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ)); const float Jz0 = posY - ImP0; const float Jz1 = posZ - ImP1; const float Jw0 = 1.f / (ImC0 + err2Y); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 9d10d40107b8f..502a70cb57762 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -256,7 +256,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov int32_t nAvgCharge = 0; for (int32_t i = start; i != stop; i += cl ? 0 : direction) { - float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise + float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise float time = 0.f, invCharge = 0.f, invSqrtCharge = 0.f; // Same here... int32_t clusters = 0; while (true) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 10ff0a32aeaf3..5bb63d6a10254 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -30,7 +30,7 @@ class GPUTPCTracklet { public: #if !defined(GPUCA_GPUCODE) - GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){}; + GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {}; #endif //! GPUCA_GPUCODE GPUhd() int32_t FirstRow() const { return mFirstRow; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index e7634fa397bae..f1fd95d696f5d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -101,7 +101,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2); for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins - pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment + pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment const TPCZSTBHDR* tbHdr = reinterpret_cast(pagePtr); if ((tbHdr->rowMask & 0x7FFF) == 0) { pagePtr += 2; @@ -324,8 +324,8 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread( bits -= DECODE_BITS; nSamplesWritten++; rawFECChannel++; // Ensure we don't decode same channel twice - } // while (bits >= DECODE_BITS) - } // while (nSamplesWritten < nAdc) + } // while (bits >= DECODE_BITS) + } // while (nSamplesWritten < nAdc) } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3 uint32_t rawFECChannel = 0; @@ -705,7 +705,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( #define PEEK_OVERFLOW(pagePtr, offset) \ (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \ - ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset)-payloadEnd) \ + ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \ : (pagePtr) + (offset))) #define TEST_BIT(x, bit) static_cast((x) & (1 << (bit))) @@ -931,8 +931,8 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( bits -= DECODE_BITS; nSamplesWritten++; rawFECChannel++; // Ensure we don't decode same channel twice - } // while (bits >= DECODE_BITS) - } // while (nSamplesWritten < nAdc) + } // while (bits >= DECODE_BITS) + } // while (nSamplesWritten < nAdc) assert(PayloadExtendsToNextPage || adcData <= page); assert(PayloadExtendsToNextPage || page <= payloadEnd); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx index 05fddda5bec68..f3a914cbfcaee 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx @@ -316,8 +316,8 @@ GPUd() void GPUTPCCFNoiseSuppression::findMinimaAndPeaks( uint8_t* bufp = (uint8_t*)buf; /************************************** - * Look for peaks - **************************************/ + * Look for peaks + **************************************/ CfUtils::blockLoad( peakMap, diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 24624e60ceba7..f6b8bea29822a 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -45,7 +45,7 @@ class propagatorInterface { public: typedef o2::base::Propagator propagatorParam; - GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop){}; + GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop) {}; GPUd() propagatorInterface(const propagatorInterface&) = delete; GPUd() propagatorInterface& operator=(const propagatorInterface&) = delete; @@ -200,7 +200,7 @@ class propagatorInterface : public GPUTPCGMPropagator } GPUd() bool propagateToX(float x, float maxSnp, float maxStep) { - //bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false; + // bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false; int32_t retVal = PropagateToXAlpha(x, GetAlpha(), true); bool ok = (retVal == 0) ? true : false; ok = mTrack->CheckNumericalQuality(); diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h index 6a6e13fe84e36..ee7d7a30b1c55 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h @@ -18,15 +18,15 @@ #define GPUTRDTRACKDATA_H struct GPUTRDTrackDataRecord { - float mAlpha; // azimuthal angle of reference frame - float fX; // x: radial distance - float fY; // local Y-coordinate of a track (cm) - float fZ; // local Z-coordinate of a track (cm) - float mSinPhi; // local sine of the track momentum azimuthal angle - float fTgl; // tangent of the track momentum dip angle - float fq1Pt; // 1/pt (1/(GeV/c)) - float fC[15]; // covariance matrix - int32_t fTPCTrackID; // id of corresponding TPC track + float mAlpha; // azimuthal angle of reference frame + float fX; // x: radial distance + float fY; // local Y-coordinate of a track (cm) + float fZ; // local Z-coordinate of a track (cm) + float mSinPhi; // local sine of the track momentum azimuthal angle + float fTgl; // tangent of the track momentum dip angle + float fq1Pt; // 1/pt (1/(GeV/c)) + float fC[15]; // covariance matrix + int32_t fTPCTrackID; // id of corresponding TPC track int32_t fAttachedTracklets[6]; // IDs for attached tracklets sorted by layer uint8_t mIsPadrowCrossing; // bits 0 to 5 indicate whether a padrow was crossed diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index fa0711887f60f..c633f10adae38 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -12,7 +12,7 @@ /// \file GPUTRDTracker.cxx /// \author Ole Schmidt -//#define ENABLE_GPUTRDDEBUG +// #define ENABLE_GPUTRDDEBUG #define ENABLE_WARNING 0 #define ENABLE_INFO 0 @@ -326,7 +326,6 @@ GPUd() int32_t GPUTRDTracker_t::LoadTrack(const TRDTRK& trk, uint3 return (0); } - template GPUd() void GPUTRDTracker_t::DumpTracks() { @@ -439,19 +438,19 @@ GPUd() bool GPUTRDTracker_t::CalculateSpacePoints(int32_t iCollisi int32_t trkltIdxStart = trkltIdxOffset + iFirstTrackletInDet; for (int32_t trkltIdx = trkltIdxStart; trkltIdx < trkltIdxStart + nTrackletsInDet; ++trkltIdx) { int32_t trkltZbin = tracklets[trkltIdx].GetZbin(); - float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates - float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates + float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates + float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates xTrkltDet[0] = mGeo->AnodePos() + sRadialOffset; xTrkltDet[1] = tracklets[trkltIdx].GetY(); xTrkltDet[2] = pp->GetRowPos(trkltZbin) - pp->GetRowSize(trkltZbin) / 2.f - pp->GetRowPos(pp->GetNrows() / 2); - //GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]); + // GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]); matrix->LocalToMaster(xTrkltDet, xTrkltSec); mSpacePoints[trkltIdx].setX(xTrkltSec[0]); mSpacePoints[trkltIdx].setY(xTrkltSec[1]); mSpacePoints[trkltIdx].setZ(xTrkltSec[2]); mSpacePoints[trkltIdx].setDy(tracklets[trkltIdx].GetdY()); - //GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ()); + // GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ()); } } return result; @@ -475,10 +474,10 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK float zShiftTrk = 0.f; if (mProcessPerTimeFrame) { zShiftTrk = (mTrackAttribs[iTrk].mTime - GetConstantMem()->ioPtrs.trdTriggerTimes[collisionId]) * mTPCVdrift * mTrackAttribs[iTrk].mSide; - //float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift; - // increase Z error based on time window - // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached) - //t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned + // float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift; + // increase Z error based on time window + // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached) + // t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned } const GPUTRDpadPlane* pad = nullptr; const GPUTRDTrackletWord* tracklets = GetConstantMem()->ioPtrs.trdTracklets; @@ -637,7 +636,7 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK } Hypothesis hypo(trkWork->getNlayersFindable(), iCandidate, trkltIdx, trkWork->getChi2() + chi2); InsertHypothesis(hypo, nCurrHypothesis, hypothesisIdxOffset); - } // end tracklet in window + } // end tracklet in window } // tracklet loop } // chamber loop @@ -723,7 +722,7 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK #ifdef ENABLE_GPUTRDDEBUG prop->setTrack(&trackNoUp); prop->rotate(GetAlphaOfSector(trkltSec)); - //prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f); + // prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f); prop->propagateToX(mR[tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetDetector()], .8f, 2.f); prop->setTrack(trkWork); #endif @@ -1103,7 +1102,6 @@ GPUd() bool GPUTRDTracker_t::IsGeoFindable(const TRDTRK* t, const return true; } - #ifndef GPUCA_GPUCODE namespace o2::gpu { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 29a9b529b0558..431fa357e8b89 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -85,7 +85,7 @@ class GPUTRDTracker_t : public GPUProcessor int32_t mLayers; // number of layers with TRD space point int32_t mCandidateId; // to which track candidate the hypothesis belongs int32_t mTrackletId; // tracklet index to be used for update (global index within tracklet array) - float mChi2; // predicted chi2 for given space point + float mChi2; // predicted chi2 for given space point GPUd() float GetReducedChi2() { return mLayers > 0 ? mChi2 / mLayers : mChi2; } GPUd() Hypothesis() : mLayers(0), mCandidateId(-1), mTrackletId(-1), mChi2(9999.f) {} @@ -148,32 +148,32 @@ class GPUTRDTracker_t : public GPUProcessor GPUd() const typename PROP::propagatorParam* getPropagatorParam(); protected: - float* mR; // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm - bool mIsInitialized; // flag is set upon initialization - bool mGenerateSpacePoints; // if true, only tracklets are provided as input and they will be converted into space points by the tracker - bool mProcessPerTimeFrame; // if true, tracking is done per time frame instead of on a single events basis - int16_t mNAngleHistogramBins; // number of bins per chamber for the angular difference histograms - float mAngleHistogramRange; // range of impact angles covered by each histogram - int16_t mMemoryPermanent; // memory id of permanent memory for the tracker - int16_t mMemoryTracklets; // memory id of memory for TRD tracklets - int16_t mMemoryTracks; // memory id of memory for tracks (used for i/o) - int32_t mNMaxCollisions; // max number of collisions to process (per time frame) - int32_t mNMaxTracks; // max number of tracks the tracker can handle (per event) - int32_t mNMaxSpacePoints; // max number of space points hold by the tracker (per event) - TRDTRK* mTracks; // array of trd-updated tracks - HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes - int32_t mNCandidates; // max. track hypothesis per layer - int32_t mNTracks; // number of TPC tracks to be matched - int32_t mNEvents; // number of processed events - int32_t mMaxBackendThreads; // maximum number of supported threads + float* mR; // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm + bool mIsInitialized; // flag is set upon initialization + bool mGenerateSpacePoints; // if true, only tracklets are provided as input and they will be converted into space points by the tracker + bool mProcessPerTimeFrame; // if true, tracking is done per time frame instead of on a single events basis + int16_t mNAngleHistogramBins; // number of bins per chamber for the angular difference histograms + float mAngleHistogramRange; // range of impact angles covered by each histogram + int16_t mMemoryPermanent; // memory id of permanent memory for the tracker + int16_t mMemoryTracklets; // memory id of memory for TRD tracklets + int16_t mMemoryTracks; // memory id of memory for tracks (used for i/o) + int32_t mNMaxCollisions; // max number of collisions to process (per time frame) + int32_t mNMaxTracks; // max number of tracks the tracker can handle (per event) + int32_t mNMaxSpacePoints; // max number of space points hold by the tracker (per event) + TRDTRK* mTracks; // array of trd-updated tracks + HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes + int32_t mNCandidates; // max. track hypothesis per layer + int32_t mNTracks; // number of TPC tracks to be matched + int32_t mNEvents; // number of processed events + int32_t mMaxBackendThreads; // maximum number of supported threads // index of first tracklet for each chamber within tracklets array, last entry is total number of tracklets for given collision // the array has (kNChambers + 1) * numberOfCollisions entries // note, that for collision iColl one has to add an offset corresponding to the index of the first tracklet of iColl to the index stored in mTrackletIndexArray int32_t* mTrackletIndexArray; - Hypothesis* mHypothesis; // array with multiple track hypothesis - TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking - GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame - const GPUTRDGeometry* mGeo; // TRD geometry + Hypothesis* mHypothesis; // array with multiple track hypothesis + TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking + GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame + const GPUTRDGeometry* mGeo; // TRD geometry /// ---- error parametrization depending on magnetic field ---- float mRPhiA2; // parameterization for tracklet position resolution float mRPhiB; // parameterization for tracklet position resolution @@ -185,14 +185,14 @@ class GPUTRDTracker_t : public GPUProcessor float mAngleToDyB; // parameterization for conversion track angle -> tracklet deflection float mAngleToDyC; // parameterization for conversion track angle -> tracklet deflection /// ---- end error parametrization ---- - bool mDebugOutput; // store debug output - static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker - float mMaxEta; // TPC tracks with higher eta are ignored - float mRoadZ; // in z, a constant search road is used - float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle - float mTPCVdrift; // TPC drift velocity used for shifting TPC tracks along Z - float mTPCTDriftOffset; // TPC drift time additive offset - GPUTRDTrackerDebug* mDebug; // debug output + bool mDebugOutput; // store debug output + static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker + float mMaxEta; // TPC tracks with higher eta are ignored + float mRoadZ; // in z, a constant search road is used + float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle + float mTPCVdrift; // TPC drift velocity used for shifting TPC tracks along Z + float mTPCTDriftOffset; // TPC drift time additive offset + GPUTRDTrackerDebug* mDebug; // debug output }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index fc874070ec9b8..cd7dfb9432b93 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -82,7 +82,7 @@ namespace o2::gpu class GPUTRDTrackletWord : private o2::trd::Tracklet64 { public: - GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){}; + GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord) {}; GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() ~GPUTRDTrackletWord() = default; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h index ceb63e788564a..9087ec9a431f6 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h @@ -138,12 +138,12 @@ class GPUDisplayFrontend : public GPUDisplayFrontendInterface std::unique_ptr mGUI; - void HandleKey(uint8_t key); // Callback for handling key presses - int32_t DrawGLScene(); // Callback to draw the GL scene - void HandleSendKey(); // Optional callback to handle key press from external source (e.g. stdin by default) + void HandleKey(uint8_t key); // Callback for handling key presses + int32_t DrawGLScene(); // Callback to draw the GL scene + void HandleSendKey(); // Optional callback to handle key press from external source (e.g. stdin by default) void ResizeScene(int32_t width, int32_t height); // Callback when GL window is resized int32_t InitDisplay(bool initFailure = false); // Callback to initialize the GL Display (to be called in StartDisplay) - void ExitDisplay(); // Callback to clean up the GL Display + void ExitDisplay(); // Callback to clean up the GL Display int32_t& drawTextFontSize(); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx index 8d48536e0a351..e511718e258f7 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx @@ -80,12 +80,12 @@ void KillGLWindow() // Properly Kill The Window BOOL CreateGLWindow(char* title, int32_t width, int32_t height, int32_t bits, bool fullscreenflag) { - GLuint PixelFormat; // Holds The Results After Searching For A Match - WNDCLASS wc; // Windows Class Structure - DWORD dwExStyle; // Window Extended Style - DWORD dwStyle; // Window Style - RECT WindowRect; // Grabs Rectangle Upper Left / Lower Right Values - WindowRect.left = (int64_t)0; // Set Left Value To 0 + GLuint PixelFormat; // Holds The Results After Searching For A Match + WNDCLASS wc; // Windows Class Structure + DWORD dwExStyle; // Window Extended Style + DWORD dwStyle; // Window Style + RECT WindowRect; // Grabs Rectangle Upper Left / Lower Right Values + WindowRect.left = (int64_t)0; // Set Left Value To 0 WindowRect.right = (int64_t)width; // Set Right Value To Requested Width WindowRect.top = (int64_t)0; // Set Top Value To 0 WindowRect.bottom = (int64_t)height; // Set Bottom Value To Requested Height diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index 92da6bbac94e8..a7811c6fd55ed 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -91,7 +91,7 @@ class GPUTPCTrkLbl inline U computeLabel(float* labelWeight = nullptr, float* totalWeight = nullptr, int32_t* maxCount = nullptr) { if (mLabels.size() == 0) { - return U(); //default constructor creates NotSet label + return U(); // default constructor creates NotSet label } else { uint32_t bestLabelNum = 0, bestLabelCount = 0; for (uint32_t j = 0; j < mLabels.size(); j++) { @@ -133,10 +133,10 @@ struct GPUTPCTrkLbl_ret { template GPUTPCTrkLbl_ret(T){}; #ifdef GPUCA_TPC_GEOMETRY_O2 - GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()){}; + GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()) {}; #endif #ifdef GPUCA_STANDALONE - GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID){}; + GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID) {}; #endif void setFakeFlag() { diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index fb3c5f22d61ef..43b946e6238b2 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -31,7 +31,7 @@ class genEvents int32_t GenerateEvent(const GPUParam& sectorParam, char* filename) { return 1; } void FinishEventGenerator() {} - static void RunEventGenerator(GPUChainTracking* rec){}; + static void RunEventGenerator(GPUChainTracking* rec) {}; }; #else diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cd6267179c844..cdb41ec5813f2 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -32,8 +32,7 @@ namespace qConfig { #define QCONFIG_SETTING(name, type) \ - struct qon_mxcat3(q, name, _t) \ - { \ + struct qon_mxcat3(q, name, _t) { \ type v; \ constexpr qon_mxcat3(q, name, _t)(type s) : v(s) {} \ }; \ @@ -41,8 +40,7 @@ namespace qConfig #define QCONFIG_SETTING_TEMPLATE(name) \ template \ - struct qon_mxcat3(q, name, _t) \ - { \ + struct qon_mxcat3(q, name, _t) { \ T v; \ constexpr qon_mxcat3(q, name, _t)(const T& s) : v(s) {} \ }; \ @@ -68,7 +66,7 @@ static inline const char* getOptName(const char** argv, int32_t i) template struct qConfigSettings { - qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false){}; + qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false) {}; template qConfigSettings(const qConfigSettings v) : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(v.message), allowEmpty(v.allowEmpty){}; bool checkMin, checkMax; diff --git a/GPU/GPUTracking/utils/threadserver.h b/GPU/GPUTracking/utils/threadserver.h index 606531f46f201..c8dfe831fd578 100644 --- a/GPU/GPUTracking/utils/threadserver.h +++ b/GPU/GPUTracking/utils/threadserver.h @@ -98,7 +98,7 @@ class qThreadCls qThreadParamCls& XthreadParam = *((qThreadParamCls*)&this->threadParam); XthreadParam.pCls = pCls; - XthreadParam.pFunc = (void (S::*)(void*))pFunc; + XthreadParam.pFunc = (void(S::*)(void*))pFunc; XthreadParam.threadNum = threadNum; XthreadParam.pinCPU = pinCPU; pthread_t thr; @@ -150,7 +150,7 @@ void* qThreadCls::qThreadWrapperCls(T* arg) sched_setaffinity(0, sizeof(tmp_mask), &tmp_mask); } - void (S::*pFunc)(T*) = (void (S::*)(T*))arg_A->pFunc; + void (S::*pFunc)(T*) = (void(S::*)(T*))arg_A->pFunc; (arg_A->pCls->*pFunc)(arg); arg_A->threadMutex[1].Unlock(); diff --git a/GPU/GPUbenchmark/cuda/Kernels.cu b/GPU/GPUbenchmark/cuda/Kernels.cu index 75799e4aa8c96..c309e7b2dbc5d 100644 --- a/GPU/GPUbenchmark/cuda/Kernels.cu +++ b/GPU/GPUbenchmark/cuda/Kernels.cu @@ -666,9 +666,9 @@ void GPUbenchmark::runTest(Test test, Mode mode, KernelConfig config) } nThreads *= mOptions.threadPoolFraction; - void (*kernel)(chunk_t*, size_t) = &gpu::read_k; // Initialising to a default value - void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k; // Initialising to a default value - void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k; // Initialising to a default value + void (*kernel)(chunk_t*, size_t) = &gpu::read_k; // Initialising to a default value + void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k; // Initialising to a default value + void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k; // Initialising to a default value void (*kernel_rand_distributed)(chunk_t**, size_t*, int32_t) = &gpu::rand_read_dist_k; // Initialising to a default value bool is_random{false}; diff --git a/GPU/TPCFastTransformation/BandMatrixSolver.h b/GPU/TPCFastTransformation/BandMatrixSolver.h index f11f538e49275..7de44fe4b85e2 100644 --- a/GPU/TPCFastTransformation/BandMatrixSolver.h +++ b/GPU/TPCFastTransformation/BandMatrixSolver.h @@ -131,7 +131,7 @@ inline void BandMatrixSolver::triangulateBlock(double AA[], double b A[0] = c; // store 1/a[0][0] double* rowi = A + BandWidthT - 1; for (int32_t i = 1; i < m; i++) { // row 0+i - double ai = c * A[i]; // A[0][i] + double ai = c * A[i]; // A[0][i] for (int32_t j = i; j < m; j++) { rowi[j] -= ai * A[j]; // A[i][j] -= A[0][j]/A[0][0]*A[i][0] } diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx index d709e5b9af92d..3edd8f8f22e55 100644 --- a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx +++ b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx @@ -71,7 +71,7 @@ void ChebyshevFit1D::fit() mA[i * mN + j] = mA[j * mN + i]; } } - //print(); + // print(); { double* Ai = mA.data(); for (int32_t i = 0; i < mN; i++, Ai += mN) { @@ -88,7 +88,7 @@ void ChebyshevFit1D::fit() } mB[j] -= c * mB[i]; } - //print(); + // print(); } } { diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.h b/GPU/TPCFastTransformation/CorrectionMapsHelper.h index 32ff6e1f06b10..46070b36e63b2 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.h +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.h @@ -165,25 +165,25 @@ class CorrectionMapsHelper MapRefBit = 0x2, LumiBit = 0x4, MapMShapeBit = 0x10 }; - bool mOwner = false; // is content of pointers owned by the helper + bool mOwner = false; // is content of pointers owned by the helper bool mLumiCTPAvailable = false; // is CTP Lumi available // these 2 are global options, must be set by the workflow global options int32_t mLumiScaleType = -1; // use CTP Lumi (1) or TPCScaler (2) for the correction scaling, 0 - no scaling int32_t mLumiScaleMode = -1; // scaling-mode of the correciton maps int32_t mUpdatedFlags = 0; - float mInstLumiCTP = 0.; // instanteneous luminosity from CTP (a.u) - float mInstLumi = 0.; // instanteneous luminosity (a.u) used for TPC corrections scaling - float mMeanLumi = 0.; // mean luminosity of the map (a.u) used for TPC corrections scaling - float mMeanLumiRef = 0.; // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference - float mLumiScale = 0.; // precalculated mInstLumi/mMeanLumi - float mMeanLumiOverride = -1.f; // optional value to override mean lumi - float mMeanLumiRefOverride = -1.f; // optional value to override ref mean lumi - float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP - bool mEnableMShape = false; ///< use v shape correction - bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again - o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform - o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform - o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side + float mInstLumiCTP = 0.; // instanteneous luminosity from CTP (a.u) + float mInstLumi = 0.; // instanteneous luminosity (a.u) used for TPC corrections scaling + float mMeanLumi = 0.; // mean luminosity of the map (a.u) used for TPC corrections scaling + float mMeanLumiRef = 0.; // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference + float mLumiScale = 0.; // precalculated mInstLumi/mMeanLumi + float mMeanLumiOverride = -1.f; // optional value to override mean lumi + float mMeanLumiRefOverride = -1.f; // optional value to override ref mean lumi + float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP + bool mEnableMShape = false; ///< use v shape correction + bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again + o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform + o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform + o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side ClassDefNV(CorrectionMapsHelper, 6); }; diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h index 506cd39b519af..e750bffd28f4b 100644 --- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h @@ -48,13 +48,13 @@ struct NDPiecewisePolynomialContainer { /// for ROOT I/O NDPiecewisePolynomialContainer() = default; - const uint32_t mDim{}; ///< number of dimensions of the polynomial - const uint32_t mDegree{}; ///< degree of the polynomials - const std::vector mParams{}; ///< parameters of the polynomial - const bool mInteractionOnly{}; ///< consider only interaction terms - const std::vector mMin{}; ///< min vertices positions of the grid - const std::vector mMax{}; ///< max vertices positions of the grid - const std::vector mN{}; ///< number of vertices for each dimension + const uint32_t mDim{}; ///< number of dimensions of the polynomial + const uint32_t mDegree{}; ///< degree of the polynomials + const std::vector mParams{}; ///< parameters of the polynomial + const bool mInteractionOnly{}; ///< consider only interaction terms + const std::vector mMin{}; ///< min vertices positions of the grid + const std::vector mMax{}; ///< max vertices positions of the grid + const std::vector mN{}; ///< number of vertices for each dimension }; #endif diff --git a/GPU/TPCFastTransformation/Spline1DHelper.cxx b/GPU/TPCFastTransformation/Spline1DHelper.cxx index 938604bb9172d..9177c67d8b87b 100644 --- a/GPU/TPCFastTransformation/Spline1DHelper.cxx +++ b/GPU/TPCFastTransformation/Spline1DHelper.cxx @@ -606,7 +606,7 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) Spline1D spline2(spline1); spline1.approximateFunction(0., TMath::Pi(), F, nAuxiliaryPoints); - //if (itry == 0) + // if (itry == 0) { TFile outf("testSpline1D.root", "recreate"); if (outf.IsZombie()) { @@ -731,9 +731,9 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) } } // draw } - //delete canv; - //delete nt; - //delete knots; + // delete canv; + // delete nt; + // delete knots; statDf1 = sqrt(statDf1 / statN); statDf2 = sqrt(statDf2 / statN); diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.h b/GPU/TPCFastTransformation/Spline1DHelperOld.h index fc8d33ad64f87..971541e03258e 100644 --- a/GPU/TPCFastTransformation/Spline1DHelperOld.h +++ b/GPU/TPCFastTransformation/Spline1DHelperOld.h @@ -41,13 +41,13 @@ class Spline1DHelperOld /// \brief Helper structure for 1D spline construction /// struct DataPoint { - double u; ///< u coordinate - double cS0; ///< a coefficient for s0 - double cZ0; ///< a coefficient for s'0 - double cS1; ///< a coefficient for s1 - double cZ1; ///< a coefficient for s'1 + double u; ///< u coordinate + double cS0; ///< a coefficient for s0 + double cZ0; ///< a coefficient for s'0 + double cS1; ///< a coefficient for s1 + double cZ1; ///< a coefficient for s'1 int32_t iKnot; ///< index of the left knot of the segment - bool isKnot; ///< is the point placed at a knot + bool isKnot; ///< is the point placed at a knot }; /// _____________ Constructors / destructors __________________________ @@ -160,11 +160,11 @@ class Spline1DHelperOld /// helpers for the construction of 1D spline - Spline1D mSpline; ///< copy of the spline - int32_t mFdimensions; ///< n of F dimensions - std::vector mDataPoints; ///< measurement points + Spline1D mSpline; ///< copy of the spline + int32_t mFdimensions; ///< n of F dimensions + std::vector mDataPoints; ///< measurement points std::vector mKnotDataPoints; ///< which measurement points are at knots - std::vector mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method + std::vector mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method std::vector mLSMmatrixSderivatives; std::vector mLSMmatrixSvalues; diff --git a/GPU/TPCFastTransformation/Spline1DSpec.cxx b/GPU/TPCFastTransformation/Spline1DSpec.cxx index 603013d5e0808..0d33cdc88010a 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline1DSpec.cxx @@ -144,7 +144,7 @@ void Spline1DContainer::recreate(int32_t nYdim, int32_t numberOfKnots, co } } -#endif //GPUCA_GPUCODE +#endif // GPUCA_GPUCODE template void Spline1DContainer::print() const diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h index 1ed1cc322ede3..6462f291d1136 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.h +++ b/GPU/TPCFastTransformation/Spline1DSpec.h @@ -211,13 +211,13 @@ class Spline1DContainer : public FlatObject /// _____________ Data members ____________ - int32_t mYdim = 0; ///< dimentionality of F - int32_t mNumberOfKnots = 0; ///< n knots on the grid - int32_t mUmax = 0; ///< U of the last knot - DataT mXmin = 0; ///< X of the first knot - DataT mXtoUscale = 0; ///< a scaling factor to convert X to U + int32_t mYdim = 0; ///< dimentionality of F + int32_t mNumberOfKnots = 0; ///< n knots on the grid + int32_t mUmax = 0; ///< U of the last knot + DataT mXmin = 0; ///< X of the first knot + DataT mXtoUscale = 0; ///< a scaling factor to convert X to U int32_t* mUtoKnotMap = nullptr; //! (transient!!) pointer to (integer U -> knot index) map inside the mFlatBufferPtr array - DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array + DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array ClassDefNV(Spline1DContainer, 1); }; diff --git a/GPU/TPCFastTransformation/Spline2DSpec.cxx b/GPU/TPCFastTransformation/Spline2DSpec.cxx index 4571110bdedaa..055530b9314c2 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline2DSpec.cxx @@ -66,7 +66,7 @@ void Spline2DContainer::setActualBufferAddress(char* actualFlatBufferPtr) mParameters = nullptr; parametersOffset = alignSize(u2Offset + mGridX2.getFlatBufferSize(), getParameterAlignmentBytes()); - //bufferSize = parametersOffset + getSizeOfParameters(); + // bufferSize = parametersOffset + getSizeOfParameters(); mParameters = reinterpret_cast(mFlatBufferPtr + parametersOffset); mGridX1.setActualBufferAddress(mFlatBufferPtr); diff --git a/GPU/TPCFastTransformation/SplineHelper.cxx b/GPU/TPCFastTransformation/SplineHelper.cxx index b0d1f4348ca60..6e1b53510e0d0 100644 --- a/GPU/TPCFastTransformation/SplineHelper.cxx +++ b/GPU/TPCFastTransformation/SplineHelper.cxx @@ -64,8 +64,8 @@ int32_t SplineHelper::pointstoarray(const int32_t indices[], const int32_ } //////////////// -//arraytopoints -// HILFSFUNKTION +// arraytopoints +// HILFSFUNKTION template int32_t SplineHelper::arraytopoints(int32_t point, int32_t result[], const int32_t numbers[], int32_t dim) { @@ -133,8 +133,8 @@ void SplineHelper::approximateFunction( } // end for all DataPoints d // END MY VERSION - //std::vector dataPointF(getNumberOfDataPoints() * mFdimensions); - //DUMYY VERSION Commented out + // std::vector dataPointF(getNumberOfDataPoints() * mFdimensions); + // DUMYY VERSION Commented out /* for (int32_t i = 0; i < getNumberOfDataPoints() * mFdimensions; i++) { dataPointF[i] = 1.; } */ @@ -250,11 +250,11 @@ void SplineHelper::approximateFunction( // TO BE REMOVED TEST: // LOG(info) << "number of paramtertypes per knot : " << numberOfParameterTypes << ", "; - std::unique_ptr allParameters[numberOfParameterTypes]; //Array for the different parametertypes s, s'u, s'v, s''uv,... + std::unique_ptr allParameters[numberOfParameterTypes]; // Array for the different parametertypes s, s'u, s'v, s''uv,... for (int32_t i = 0; i < numberOfParameterTypes; i++) { - allParameters[i] = std::unique_ptr(new double[numberOfAllDataPoints * mFdimensions]); //To-Do:Fdim!! + allParameters[i] = std::unique_ptr(new double[numberOfAllDataPoints * mFdimensions]); // To-Do:Fdim!! } - //filling allParameters[0] and FParameters with s: + // filling allParameters[0] and FParameters with s: for (int32_t i = 0; i < numberOfAllDataPoints; i++) { for (int32_t f = 0; f < mFdimensions; f++) { // for all f-dimensions allParameters[0][i * mFdimensions + f] = DataPointF[i * mFdimensions + f]; // TO DO - Just get the pointer adress there PLEASE! @@ -273,24 +273,24 @@ void SplineHelper::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions // WORKAROUND Getting Knotindices: knotindices[j] = p0indices[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } // get the knotindexvalue for FParameters: int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { // for all f-dimensions get function values into Fparameters - Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; ///write derivatives in FParameters + Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; /// write derivatives in FParameters } } // end if isKnot } // end i (filling DataPointF Values into allParameters[0] and FParameters) // now: allParameters[0] = dataPointF; - //Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...); + // Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...); std::unique_ptr dataPointF1D[mXdimensions]; for (int32_t i = 0; i < mXdimensions; i++) { dataPointF1D[i] = std::unique_ptr(new double[numberOfDataPoints[i] * mFdimensions]); // To-Do:Fdim!! For s and derivetives at all knots. } - //Array to be filled by Spline1D::approximateFunctionGradually(...); + // Array to be filled by Spline1D::approximateFunctionGradually(...); std::unique_ptr par[mXdimensions]; std::unique_ptr parD[mXdimensions]; @@ -301,7 +301,7 @@ void SplineHelper::approximateFunction( // LOG(info) << "NumberOfParameters: " << mNumberOfParameters ; - //STARTING MAIN-LOOP, for all Parametertypes: + // STARTING MAIN-LOOP, for all Parametertypes: for (int32_t p = 1; p < numberOfParameterTypes; p++) { // p = 1!! Wir kriegen s (p0) durch approximateFunction()oben int32_t dimension = 0; // find the dimension for approximation for (int32_t i = (int32_t)(log2f((float)p)); i >= 0; i--) { @@ -366,9 +366,9 @@ void SplineHelper::approximateFunction( for (int32_t i = 0; i < mXdimensions; i++) { redistributionindex[i] = startpoint[i]; } - //redistributing the derivatives at dimension-Knots into array p + // redistributing the derivatives at dimension-Knots into array p for (int32_t i = 0; i < numberOfKnots[dimension]; i++) { // for all dimension-Knots - redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); //find the indices + redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); // find the indices int32_t finalposition = pointstoarray(redistributionindex, numberOfDataPoints, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { @@ -380,7 +380,7 @@ void SplineHelper::approximateFunction( if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) { isKnot = 0; break; - } //noch mal checken!! Das muss noch anders!! + } // noch mal checken!! Das muss noch anders!! } if (isKnot) { // for all knots @@ -388,20 +388,20 @@ void SplineHelper::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate Knotindices for all dimensions knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } // get the knotindexvalue for FParameters: int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { - Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; ///write derivatives in FParameters + Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; /// write derivatives in FParameters } } } // end for all fknots (for redistribution) // recalculation: for (int32_t i = 0; i < numberOfDataPoints[dimension]; i++) { // this is somehow still redundant// TO DO: ONLY PART OF approximateFunction WHERE NDIM is considerd!! - redistributionindex[dimension] = i; // getting current datapointindices - bool isKnot = 1; // check is current datapoint a knot? + redistributionindex[dimension] = i; // getting current datapointindices + bool isKnot = 1; // check is current datapoint a knot? for (int32_t j = 0; j < mXdimensions; j++) { if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) { isKnot = 0; @@ -410,7 +410,7 @@ void SplineHelper::approximateFunction( } double splineF[mFdimensions]; double u = mHelpers[dimension].getDataPoint(i).u; - mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); //recalculate at all datapoints of dimension + mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); // recalculate at all datapoints of dimension for (int32_t dim = 0; dim < mFdimensions; dim++) { // writing it in allParameters // LOG(info)<::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } int32_t currentknotarrayindex = pointstoarray(knotindices, numberOfKnots, mXdimensions); // getting the recalculated value into FParameters: @@ -433,7 +433,7 @@ void SplineHelper::approximateFunction( } // end recalculation } // end of all1DSplines } // end of for parametertypes -} //end of approxymateFunction MYVERSION! +} // end of approxymateFunction MYVERSION! template int32_t SplineHelper::test(const bool draw, const bool drawDataPoints) diff --git a/GPU/TPCFastTransformation/SplineHelper.h b/GPU/TPCFastTransformation/SplineHelper.h index 986297e368aab..8c99e8113864f 100644 --- a/GPU/TPCFastTransformation/SplineHelper.h +++ b/GPU/TPCFastTransformation/SplineHelper.h @@ -106,9 +106,9 @@ class SplineHelper /// Stores an error message int32_t storeError(Int_t code, const char* msg); - TString mError = ""; ///< error string - int32_t mXdimensions; ///< number of X dimensions - int32_t mFdimensions; ///< number of F dimensions + TString mError = ""; ///< error string + int32_t mXdimensions; ///< number of X dimensions + int32_t mFdimensions; ///< number of F dimensions int32_t mNumberOfParameters; ///< number of parameters int32_t mNumberOfDataPoints; ///< number of data points std::vector> mHelpers; diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h index dae17b22f42ea..1af427dee503b 100644 --- a/GPU/TPCFastTransformation/SplineSpec.h +++ b/GPU/TPCFastTransformation/SplineSpec.h @@ -299,7 +299,7 @@ class SplineSpec : public SplineContainer DataT iParameters[(1 << (2 * maxXdim)) * maxYdim]; // Array for all parameters - //get the indices of the "most left" Knot: + // get the indices of the "most left" Knot: int32_t indices[maxXdim]; // indices of the 'most left' knot for (int32_t i = 0; i < nXdim; i++) { @@ -309,7 +309,7 @@ class SplineSpec : public SplineContainer int32_t indicestmp[maxXdim]; for (int32_t i = 0; i < nKnotParametersPerY; i++) { // for every necessary Knot for (int32_t k = 0; k < nXdim; k++) { - indicestmp[k] = indices[k] + (i / (1 << k)) % 2; //get the knot-indices in every dimension (mirrored order binary counting) + indicestmp[k] = indices[k] + (i / (1 << k)) % 2; // get the knot-indices in every dimension (mirrored order binary counting) } int32_t index = TBase::getKnotIndex(indicestmp); // get index of the current Knot @@ -317,7 +317,7 @@ class SplineSpec : public SplineContainer iParameters[i * nKnotParameters + j] = Parameters[index * nKnotParameters + j]; } } - //now start with the interpolation loop: + // now start with the interpolation loop: constexpr auto maxInterpolations = (1 << (2 * maxXdim - 2)) * maxYdim; @@ -329,10 +329,10 @@ class SplineSpec : public SplineContainer int32_t nInterpolations = (1 << (2 * nXdim - 2)) * nYdim; int32_t nKnots = 1 << (nXdim); - for (int32_t d = 0; d < nXdim; d++) { // for every dimension - DataT* pointer[4] = {S0, D0, S1, D1}; // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart - for (int32_t i = 0; i < nKnots; i++) { // for every knot - for (int32_t j = 0; j < nKnots; j++) { // for every parametertype + for (int32_t d = 0; d < nXdim; d++) { // for every dimension + DataT* pointer[4] = {S0, D0, S1, D1}; // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart + for (int32_t i = 0; i < nKnots; i++) { // for every knot + for (int32_t j = 0; j < nKnots; j++) { // for every parametertype int32_t pointernr = 2 * (i % 2) + (j % 2); // to which array should it be delivered for (int32_t k = 0; k < nYdim; k++) { pointer[pointernr][0] = iParameters[(i * nKnots + j) * nYdim + k]; diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h index c353f3f3329e7..9589ecbfc1fc4 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h @@ -43,7 +43,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject /// \brief The struct contains necessary info for TPC padrow /// struct RowInfo { - int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) + int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) size_t dataOffsetBytes[3]{0}; ///< offset for the spline data withing a TPC slice ClassDefNV(RowInfo, 1); }; diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx index 27500d12d9d5d..7eea34c19ec25 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx @@ -428,7 +428,7 @@ double IrregularSpline2D3DCalibrator::getIntegralDeviationLine(const IrregularSp double d2 = dx * dx + dy * dy + dz * dz; sum += sqrt(d2 / 3.); } - //sum = sqrt(sum/3.); + // sum = sqrt(sum/3.); return sum; } diff --git a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h index 2398ff4cd1cbc..885b2e12eea0b 100644 --- a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h @@ -97,11 +97,11 @@ inline T RegularSpline1D::getSpline(const int32_t iknot1, T f0, T f1, T f2, T f3 /// The polynom is constructed with function values f0,f1,f2,f3 at knots {iknot0,iknot1,iknot2,iknot3} /// The u value supposed to be inside the [knot1,knot2] region, but also may be any. - ///f0 = f value at iknot1-1 - ///f1 = f value at iknot1 - ///f2 = f value at iknot1+1 - ///f3 = f value at iknot1+2 - ///u = u value where f(u) is searched for. + /// f0 = f value at iknot1-1 + /// f1 = f value at iknot1 + /// f2 = f value at iknot1+1 + /// f3 = f value at iknot1+2 + /// u = u value where f(u) is searched for. f0 -= f1; f2 -= f1; @@ -153,7 +153,7 @@ inline double RegularSpline1D::knotIndexToU(int32_t iknot) const inline int32_t RegularSpline1D::getKnotIndex(float u) const { - //index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen) + // index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen) int32_t index = (int32_t)(u * (mNumberOfKnots - 1)); if (index <= 1) { index = 1; diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx index 076e4ee0ed780..c030bae650414 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx @@ -110,7 +110,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int FlatObject::startConstruction(); - //construct regular grid for v + // construct regular grid for v mGridV.construct(numberOfRows); // For each x element numbersOfKnots may be a single RegularSpline1D with x knots. @@ -128,7 +128,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int // this is the space which is taken just by the RegularSpline1D's mDataIndexMapOffset = numberOfRows * sizeof(RegularSpline1D); - //The buffer size is the size of the array + // The buffer size is the size of the array FlatObject::finishConstruction(mDataIndexMapOffset + numberOfRows * sizeof(int32_t)); // Array for the 1D-Splines inside the buffer @@ -146,7 +146,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int numberOfKnots += knotsU; } - //save the numberOfRows and numberOfKnots + // save the numberOfRows and numberOfKnots mNumberOfRows = numberOfRows; mNumberOfKnots = numberOfKnots; @@ -156,7 +156,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int // this will count the amount of u-knots "under" a v-coordinate int32_t uSum = 0; - //count the amount of knots which are in gridU's lower than i + // count the amount of knots which are in gridU's lower than i for (int32_t dv = 0; dv < mNumberOfRows; dv++) { dataIndexMap[dv] = uSum; uSum += numbersOfKnots[dv]; diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h index 4da954c8096ac..954738fa74f1b 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h @@ -122,7 +122,7 @@ class SemiregularSpline2D3D : public FlatObject const RegularSpline1D& getGridV() const { return mGridV; } /// Get 1-D grid for V coordinate - //const RegularSpline1D& getGridV() const { return mGridV; } + // const RegularSpline1D& getGridV() const { return mGridV; } const RegularSpline1D& getGridU(const int32_t i) const { return getSplineArray()[i]; } /// Get u,v of i-th knot @@ -131,7 +131,7 @@ class SemiregularSpline2D3D : public FlatObject /// Get size of the mFlatBuffer data size_t getFlatBufferSize() const { return mFlatBufferSize; } - ///Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space + /// Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space int32_t getDataIndex(int32_t i, int32_t j) const; int32_t getDataIndex0(int32_t i, int32_t j) const; @@ -212,16 +212,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) // the searched u-v-coordinates have to be in this spline. if (iKnot <= nk - 1) { - //in that case v is the current index + // in that case v is the current index v = mGridV.knotIndexToU(i); - //and u the coordinate of the given index + // and u the coordinate of the given index u = gridU.knotIndexToU(iKnot); break; } - //if iKnot is greater than number of knots the searched u-v cannot be in the current gridU - //so we search for nk less indizes and continue with the next v-coordinate + // if iKnot is greater than number of knots the searched u-v cannot be in the current gridU + // so we search for nk less indizes and continue with the next v-coordinate iKnot -= nk; } } @@ -229,16 +229,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) template inline void SemiregularSpline2D3D::correctEdges(T* data) const { - //Regular v-Grid (vertical) + // Regular v-Grid (vertical) const RegularSpline1D& gridV = getGridV(); int32_t nv = mNumberOfRows; - //EIGENTLICH V VOR U!!! - //Wegen Splines aber U vor V + // EIGENTLICH V VOR U!!! + // Wegen Splines aber U vor V { // ==== left edge of U ==== - //loop through all gridUs + // loop through all gridUs for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) { T* f0 = data + getDataIndex(0, iv); T* f1 = f0 + 3; @@ -251,7 +251,7 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const } { // ==== right edge of U ==== - //loop through all gridUs + // loop through all gridUs for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) { const RegularSpline1D& gridU = getGridU(iv); int32_t nu = gridU.getNumberOfKnots(); @@ -270,8 +270,8 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const int32_t nu = gridU.getNumberOfKnots(); for (int32_t iu = 0; iu < nu; iu++) { - //f0 to f3 are the x,y,z values of 4 points in the grid along the v axis. - //Since there are no knots because of the irregularity you can get this by using the getSplineMethod. + // f0 to f3 are the x,y,z values of 4 points in the grid along the v axis. + // Since there are no knots because of the irregularity you can get this by using the getSplineMethod. T* f0 = data + getDataIndex(iu, 0); float u = gridU.knotIndexToU(iu); @@ -387,7 +387,7 @@ inline void SemiregularSpline2D3D::getSpline(const T* correctedData, float u, fl dataVx[vxIndex + 2] = gridU.getSpline(ui, correctedData[dataOffset + 2], correctedData[dataOffset + 5], correctedData[dataOffset + 8], correctedData[dataOffset + 11], u); } - //return results + // return results x = mGridV.getSpline(iknotv, dataVx[0], dataVx[3], dataVx[6], dataVx[9], v); y = mGridV.getSpline(iknotv, dataVx[1], dataVx[4], dataVx[7], dataVx[10], v); z = mGridV.getSpline(iknotv, dataVx[2], dataVx[5], dataVx[8], dataVx[11], v); @@ -426,7 +426,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa */ - //workaround 1: + // workaround 1: int32_t vGridi = mGridV.getKnotIndex(v); float dataU[12]; @@ -464,7 +464,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa y = res[1]; z = res[2]; -//getSpline( correctedData, u, v, x, y, z ); +// getSpline( correctedData, u, v, x, y, z ); #else getSpline(correctedData, u, v, x, y, z); #endif diff --git a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx index a9c39e8528354..f77a55ffcc894 100644 --- a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx +++ b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx @@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(Polynomials5D_InteractionOnly) const int32_t nPar5D5DegInteraction = 32; // number of parameters const int32_t nDim = 5; // dimensions const int32_t nDegree = 5; // degree - const float abstolerance = 0.0001f; // abosulte difference between refernce to polynomial class + const float abstolerance = 0.0001f; // abosulte difference between refernce to polynomial class const bool interactionOnly = true; MultivariatePolynomial polCT; // compile time polynomial @@ -142,7 +142,7 @@ BOOST_AUTO_TEST_CASE(Piecewise_polynomials) const int32_t nPar5D5DegInteraction = 32; // number of parameters const int32_t nDim = 5; // dimensions const int32_t nDegree = 5; // degree - const bool interactionOnly = true; // consider only interaction terms + const bool interactionOnly = true; // consider only interaction terms // reference polynomial which will be approximated by the NDPiecewisePolynomials MultivariatePolynomial polCT; diff --git a/GPU/Utils/FlatObject.h b/GPU/Utils/FlatObject.h index eba81a2ba06a2..8e13a8dedb868 100644 --- a/GPU/Utils/FlatObject.h +++ b/GPU/Utils/FlatObject.h @@ -28,7 +28,7 @@ #include "GPUCommonRtypes.h" #include "GPUCommonLogger.h" -//#define GPUCA_GPUCODE // uncomment to test "GPU" mode +// #define GPUCA_GPUCODE // uncomment to test "GPU" mode namespace o2 { @@ -319,10 +319,10 @@ class FlatObject InProgress = 0x2 ///< construction started: temporary memory is reserved }; - int32_t mFlatBufferSize = 0; ///< size of the flat buffer - uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class - char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer - char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer + int32_t mFlatBufferSize = 0; ///< size of the flat buffer + uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class + char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer + char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer ClassDefNV(FlatObject, 1); }; @@ -569,7 +569,7 @@ inline void FlatObject::setFutureBufferAddress(char* futureFlatBufferPtr) mFlatBufferContainer = nullptr; } -#endif //GPUCA_GPUCODE_DEVICE +#endif // GPUCA_GPUCODE_DEVICE } // namespace gpu } // namespace o2 diff --git a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx index 52c3421fa8eb5..a9c9b78e9847e 100644 --- a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx +++ b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx @@ -51,7 +51,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.itsClusterMC = ITSClsLabels; } } - //LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters"; + // LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters"; } if (maskTrk[GID::ITS] && ioPtr.nItsTracks == 0) { const auto& ITSTracksArray = recoCont.getITSTracks(); @@ -68,7 +68,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.itsTrackMC = ITSTrkLabels.data(); } } - //LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks"; + // LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks"; } if (maskTrk[GID::ITSTPC] && ioPtr.nTracksTPCITSO2 == 0) { @@ -77,7 +77,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTracksTPCITSO2 = trkITSTPC.size(); ioPtr.tracksTPCITSO2 = trkITSTPC.data(); } - //LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks"; + // LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks"; } if (maskCl[GID::TOF] && ioPtr.nTOFClusters == 0) { @@ -86,7 +86,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTOFClusters = tofClusters.size(); ioPtr.tofClusters = tofClusters.data(); } - //LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters"; + // LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters"; } if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTOF]) && ioPtr.nITSTPCTOFMatches == 0) { @@ -95,7 +95,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nITSTPCTOFMatches = itstpctofMatches.size(); ioPtr.itstpctofMatches = itstpctofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTRDTOF]) && ioPtr.nITSTPCTRDTOFMatches == 0) { @@ -104,7 +104,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nITSTPCTRDTOFMatches = itstpctrdtofMatches.size(); ioPtr.itstpctrdtofMatches = itstpctrdtofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTOF]) && ioPtr.nTPCTOFMatches == 0) { @@ -113,7 +113,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTPCTOFMatches = tpctofMatches.size(); ioPtr.tpctofMatches = tpctofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTRDTOF]) && ioPtr.nTPCTRDTOFMatches == 0) { @@ -122,12 +122,12 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTPCTRDTOFMatches = tpctrdtofMatches.size(); ioPtr.tpctrdtofMatches = tpctrdtofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; } if (maskCl[GID::TRD]) { recoCont.inputsTRD->fillGPUIOPtr(&ioPtr); - //LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets"; + // LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets"; } if (maskTrk[GID::ITSTPCTRD] && ioPtr.nTRDTracksITSTPCTRD == 0) { @@ -136,7 +136,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTRDTracksITSTPCTRD = trdTracks.size(); ioPtr.trdTracksITSTPCTRD = trdTracks.data(); } - //LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks"; + // LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks"; } if (maskTrk[GID::TPCTRD] && ioPtr.nTRDTracksTPCTRD == 0) { @@ -145,12 +145,12 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTRDTracksTPCTRD = trdTracks.size(); ioPtr.trdTracksTPCTRD = trdTracks.data(); } - //LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks"; + // LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks"; } if (maskCl[GID::TPC] && ioPtr.clustersNative == nullptr) { ioPtr.clustersNative = &recoCont.getTPCClusters(); - //LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters"; + // LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters"; } if (maskTrk[GID::TPC] && ioPtr.nOutputTracksTPCO2 == 0) { @@ -176,7 +176,7 @@ std::shared_ptr GPUWorkflowHelper::fi retVal->tpcLinkTRD.resize(ioPtr.nOutputTracksTPCO2, -1); ioPtr.tpcLinkTRD = retVal->tpcLinkTRD.data(); } - //LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks"; + // LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks"; } auto creator = [maskTrk, &ioPtr, &recoCont, &retVal](auto& trk, GID gid, float time, float) { From 1838f8fa4746d146815b9e3855e3ebcc69802ab6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 22:34:57 +0100 Subject: [PATCH 0075/1764] GPU dpl-worflow: Remove HSA_NO_SCRATCH_RECLAIM=1 optimization for MI50, with latest ROCm actually decreases performance... --- prodtests/full-system-test/dpl-workflow.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 76235d127037a..5259bbf951d73 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -259,7 +259,6 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ ${EPN_NODE_MI100:-} != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else From 07cdaf8b104c494953904574f06fd3a12bcac879 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 09:09:14 +0100 Subject: [PATCH 0076/1764] GPU HIP: #define no longer needed, rocthrust does it internally --- GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h index c7a1b4f55e501..5506e3925bf80 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h @@ -15,8 +15,6 @@ #ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H #define O2_GPU_RECONSTRUCTIONHIPINCLUDES_H -#define __HIP_ENABLE_DEVICE_MALLOC__ 1 // Fix SWDEV-239120 - #include #include #include From e4d7d36915b667590cb11139a4d972fbb2df9e1b Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 17 Mar 2025 10:55:51 +0100 Subject: [PATCH 0077/1764] ITS-GPU: remove debug options in production --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt index e2fc1f1388ad0..3cdb107e07438 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt @@ -13,7 +13,7 @@ if(CUDA_ENABLED) find_package(CUDAToolkit) message(STATUS "Building ITS CUDA tracker") -add_compile_options(-O0 -g -lineinfo -fPIC) +# add_compile_options(-O0 -g -lineinfo -fPIC) # add_compile_definitions(ITS_MEASURE_GPU_TIME) o2_add_library(ITStrackingCUDA SOURCES ClusterLinesGPU.cu From 203973d51038cd46734b6b8ce3a232b5ff913753 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Mar 2025 12:05:52 +0100 Subject: [PATCH 0078/1764] Drop unneeded minimum requirement (#14075) --- Framework/Foundation/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Framework/Foundation/CMakeLists.txt b/Framework/Foundation/CMakeLists.txt index 65a43073ef9d8..dc6d7238c60ac 100644 --- a/Framework/Foundation/CMakeLists.txt +++ b/Framework/Foundation/CMakeLists.txt @@ -8,8 +8,6 @@ # In applying this license CERN does not waive the privileges and immunities # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -cmake_minimum_required(VERSION 3.5) - install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/include/Framework DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) From 900e6245e8a977bd9dfc0d96df1fbd5b3fd15850 Mon Sep 17 00:00:00 2001 From: pillot Date: Sun, 16 Mar 2025 13:49:31 +0100 Subject: [PATCH 0079/1764] fix compiler warning about printf format --- .../MCH/Conditions/src/scan-hvlv-ccdb.cxx | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx index 307759c97a0c3..236effc4b1182 100644 --- a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -56,6 +56,7 @@ using DPBMAP = std::map; using ISSUE = std::tuple; using ISSUELIST = std::vector; using ISSUEMAP = std::map; +using ULL = unsigned long long; //---------------------------------------------------------------------------- bool containsAKey(std::string data, const std::set& Keys) @@ -238,14 +239,14 @@ void checkRunBoundaries(const RBMAP& runBoundaries) for (const auto& [run, boundaries] : runBoundaries) { if (boundaries.second <= boundaries.first) { printf("error: run %d EOR <= SOR: %llu - %llu (%s - %s)\n", - run, boundaries.first, boundaries.second, + run, (ULL)boundaries.first, (ULL)boundaries.second, getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); error = true; } if (boundaries.first <= endOfPreviousRun) { printf("error: SOR run %d <= EOR run %d: %llu (%s) <= %llu (%s)\n", - run, previousRun, boundaries.first, getTime(boundaries.first).c_str(), - endOfPreviousRun, getTime(endOfPreviousRun).c_str()); + run, previousRun, (ULL)boundaries.first, getTime(boundaries.first).c_str(), + (ULL)endOfPreviousRun, getTime(endOfPreviousRun).c_str()); error = true; } previousRun = run; @@ -266,7 +267,7 @@ void printRunBoundaries(const RBMAP& runBoundaries) printf("------------------------------------\n"); for (const auto& [run, boundaries] : runBoundaries) { - printf("%d: %llu - %llu (%s - %s)\n", run, boundaries.first, boundaries.second, + printf("%d: %llu - %llu (%s - %s)\n", run, (ULL)boundaries.first, (ULL)boundaries.second, getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); } @@ -324,7 +325,7 @@ DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, if (dpBoundaries.empty()) { printf("\e[0;31merror: no file found in %s in time range %llu - %llu (%s - %s) --> use the default one\e[0m\n", - what.c_str(), tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + what.c_str(), (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); dpBoundaries.emplace(1, 9999999999999); } @@ -340,13 +341,13 @@ void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, if (dpBoundaries.begin()->first > tStart) { printf("error: the beginning of the time range is not covered: %llu > %llu (%s > %s)\n", - dpBoundaries.begin()->first, tStart, + (ULL)dpBoundaries.begin()->first, (ULL)tStart, getTime(dpBoundaries.begin()->first).c_str(), getTime(tStart).c_str()); error = true; } if (dpBoundaries.rbegin()->second < tStop) { printf("error: the end of the time range is not covered: %llu < %llu (%s < %s)\n", - dpBoundaries.rbegin()->second, tStop, + (ULL)dpBoundaries.rbegin()->second, (ULL)tStop, getTime(dpBoundaries.rbegin()->second).c_str(), getTime(tStop).c_str()); error = true; } @@ -355,13 +356,13 @@ void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, for (auto [tStart, tStop] : dpBoundaries) { if (tStop <= tStart) { printf("error: EOF <= SOF: %llu - %llu (%s - %s)\n", - tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); error = true; } if (tStart != previousTStop) { printf("error: end of %s file != start of next %s file: %llu (%s) != %llu (%s))\n", scanHV ? "HV" : "LV", scanHV ? "HV" : "LV", - previousTStop, getTime(previousTStop).c_str(), tStart, getTime(tStart).c_str()); + (ULL)previousTStop, getTime(previousTStop).c_str(), (ULL)tStart, getTime(tStart).c_str()); error = true; } previousTStop = tStop; @@ -381,10 +382,10 @@ void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t timeInt printf("------------------------------------\n"); for (auto [tStart, tStop] : dpBoundaries) { - printf("%llu - %llu (%s - %s)", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + printf("%llu - %llu (%s - %s)", (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); if (tStop - tStart < 60000 * (timeInterval - 1) || tStop - tStart > 60000 * (timeInterval + 1)) { printf("\e[0;31m ! warning: validity range %s != %llu±1 min\e[0m\n", - getDuration(tStart, tStop).c_str(), timeInterval); + getDuration(tStart, tStop).c_str(), (ULL)timeInterval); } else { printf("\n"); } @@ -478,14 +479,15 @@ void fillDataPoints(const std::vector& dps, std::map& d auto previousTS = dps2.rbegin()->first; if (ts != previousTS || getValue(*itDP) != dps2.rbegin()->second) { if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); exit(1); } if (printWarning) { printf("%s%s missing the previous data point (dt = %s%llu ms)", color.c_str(), header.c_str(), - (previousTS < tMin) ? "-" : "+", (previousTS < tMin) ? tMin - previousTS : previousTS - tMin); + (previousTS < tMin) ? "-" : "+", + (ULL)((previousTS < tMin) ? tMin - previousTS : previousTS - tMin)); if (ts <= tMin) { - printf(" but get one at dt = -%llu ms\e[0m\n", tMin - ts); + printf(" but get one at dt = -%llu ms\e[0m\n", (ULL)(tMin - ts)); } else { printf("\e[0m\n"); } @@ -496,11 +498,11 @@ void fillDataPoints(const std::vector& dps, std::map& d // add the first data point (should be before the start of validity of the file) if (ts >= tMax) { - printf("error: first data point exceeding file validity range (dt = +%llu ms)\n", ts - tMax); + printf("error: first data point exceeding file validity range (dt = +%llu ms)\n", (ULL)(ts - tMax)); exit(1); } else if (ts > tMin && printWarning) { printf("%s%s missing data point prior file start of validity (dt = +%llu ms)\e[0m\n", - color.c_str(), header.c_str(), ts - tMin); + color.c_str(), header.c_str(), (ULL)(ts - tMin)); header = " "; } dps2.emplace(ts, getValue(*itDP)); @@ -510,15 +512,15 @@ void fillDataPoints(const std::vector& dps, std::map& d for (++itDP; itDP < dps.end(); ++itDP) { ts = itDP->get_epoch_time(); if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); exit(1); } if (ts < tMin && (warningLevel > 1 || (warningLevel == 1 && ts + tolerance < tMin))) { printf("%s%s data point outside of file validity range (dt = -%llu ms)\e[0m\n", - (ts + tolerance < tMin) ? "\e[0;31m" : "\e[0;34m", header.c_str(), tMin - ts); + (ts + tolerance < tMin) ? "\e[0;31m" : "\e[0;34m", header.c_str(), (ULL)(tMin - ts)); } else if (ts >= tMax && warningLevel >= 1) { printf("\e[0;31m%s data point outside of file validity range (dt = +%llu ms)\e[0m\n", - header.c_str(), ts - tMax); + header.c_str(), (ULL)(ts - tMax)); } dps2.emplace(ts, getValue(*itDP)); previousTS = ts; @@ -580,13 +582,13 @@ void printDataPoints(const DPMAP2 dpsMapsPerCh[10], std::string hvlvFormat, bool for (const auto& [alias, dps] : dpsMapsPerCh[ch]) { - printf("- %s: %lu values", alias.c_str(), dps.size()); + printf("- %s: %zu values", alias.c_str(), dps.size()); if (all) { printf("\n"); for (const auto& [ts, val] : dps) { - printf(format1.c_str(), ts, getTime(ts).c_str(), val); + printf(format1.c_str(), (ULL)ts, getTime(ts).c_str(), val); } } else if (!dps.empty()) { @@ -594,8 +596,8 @@ void printDataPoints(const DPMAP2 dpsMapsPerCh[10], std::string hvlvFormat, bool const auto firstdt = dps.begin(); const auto lastdt = dps.rbegin(); printf(format2.c_str(), - firstdt->first, getTime(firstdt->first).c_str(), firstdt->second, - lastdt->first, getTime(lastdt->first).c_str(), lastdt->second); + (ULL)firstdt->first, getTime(firstdt->first).c_str(), firstdt->second, + (ULL)lastdt->first, getTime(lastdt->first).c_str(), lastdt->second); } else { printf("\n"); @@ -719,7 +721,7 @@ void fillO2Issues(const std::vector& o2issues, // exclude issues fully outside of the DP file boudaries if (itIssue->end <= tMin || itIssue->begin >= tMax) { printf("\e[0;35mwarning: skipping O2 issue outside of file boundaries (%llu - %llu)\e[0m\n", - itIssue->begin, itIssue->end); + (ULL)itIssue->begin, (ULL)itIssue->end); continue; } @@ -727,14 +729,14 @@ void fillO2Issues(const std::vector& o2issues, if (itIssue->begin < tMin - mch::StatusMapCreatorParam::Instance().timeMargin && (itIssue != o2issues.begin() || itIssue->begin != 0)) { printf("\e[0;35mwarning: O2 returns an issue with uncommon start time (%llu < %llu)\e[0m\n", - itIssue->begin, tMin - mch::StatusMapCreatorParam::Instance().timeMargin); + (ULL)itIssue->begin, (ULL)(tMin - mch::StatusMapCreatorParam::Instance().timeMargin)); } // only the last issue could in principle extend beyond the end of the DP file, to infinity if (itIssue->end >= tMax + mch::StatusMapCreatorParam::Instance().timeMargin && (itIssue != std::prev(o2issues.end()) || itIssue->end != std::numeric_limits::max())) { printf("\e[0;35mwarning: O2 returns an issue with uncommon end time (%llu >= %llu)\e[0m\n", - itIssue->end, tMax + mch::StatusMapCreatorParam::Instance().timeMargin); + (ULL)itIssue->end, (ULL)(tMax + mch::StatusMapCreatorParam::Instance().timeMargin)); } // extend the last issue in case of continuity accross the DP files or add a new one, @@ -897,7 +899,7 @@ void printIssues(const ISSUEMAP issuesPerCh[10], const ISSUEMAP o2IssuesPerCh[10 auto printIssue = [&format](ISSUE issue, std::string color) { const auto& [tStart, tStop, min, mean, runs] = issue; printf("%s", color.c_str()); - printf(format.c_str(), tStart, tStop, + printf(format.c_str(), (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getDuration(tStart, tStop).c_str(), min, mean, runs.c_str()); printf("\e[0m"); }; From c39a87024ea2bd73db083972deae4c58ef8350ed Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 3 Feb 2025 10:32:02 +0100 Subject: [PATCH 0080/1764] Promoting critical Error logs to ILG Ops level --- Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx b/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx index b625ab344e112..d7a7282a78684 100644 --- a/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx +++ b/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx @@ -77,7 +77,7 @@ class EPNMonitor std::unordered_map mFiles; std::string mPath; std::vector mFilters; - std::unordered_map> mMapRootLogTypes; + std::unordered_map> mMapLogTypes; volatile unsigned int mRunNumber; std::string mPartition; unsigned int nLines = 0; @@ -95,12 +95,11 @@ EPNMonitor::EPNMonitor(std::string path, bool infoLogger, int runNumber, std::st mFilters.emplace_back("^Warning in {InfoLogger::InfoLogger::Severity::Info, 13}); - mMapRootLogTypes.emplace("Print in <", std::pair{InfoLogger::InfoLogger::Severity::Info, 13}); - mMapRootLogTypes.emplace("Warning in <", std::pair{InfoLogger::InfoLogger::Severity::Warning, 11}); - mMapRootLogTypes.emplace("Error in <", std::pair{InfoLogger::InfoLogger::Severity::Error, 2}); - mMapRootLogTypes.emplace("Fatal in <", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); - mMapRootLogTypes.emplace("*** Break ***", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); + mMapLogTypes.emplace("(core dumped)", std::pair{InfoLogger::InfoLogger::Severity::Error, 1}); + mMapLogTypes.emplace("Warning in <", std::pair{InfoLogger::InfoLogger::Severity::Warning, 11}); + mMapLogTypes.emplace("Error in <", std::pair{InfoLogger::InfoLogger::Severity::Error, 2}); + mMapLogTypes.emplace("Fatal in <", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); + mMapLogTypes.emplace("*** Break ***", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); mInfoLoggerActive = infoLogger; mPath = path; mRunNumber = runNumber; @@ -214,7 +213,7 @@ void EPNMonitor::thread() // assign proper severity / level for remaining ROOT log messages auto severity{InfoLogger::InfoLogger::Severity::Error}; int level{3}; - for (const auto& logType : mMapRootLogTypes) { + for (const auto& logType : mMapLogTypes) { if (line.find(logType.first) != std::string::npos) { severity = std::get(logType.second); level = std::get(logType.second); From 79ea4b7a5091d0613f7a5b49fd96e9e385c20b21 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 12 Feb 2025 15:31:14 +0100 Subject: [PATCH 0081/1764] Promoting DPL errors relevant to the ECS shifter to critical, leaving messages to be ignored at error --- Detectors/TPC/workflow/src/IDCToVectorSpec.cxx | 6 +++--- Framework/Core/src/CommonServices.cxx | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx b/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx index 27dbcf5d85bbf..e9433c775a02a 100644 --- a/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx +++ b/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx @@ -405,7 +405,7 @@ class IDCToVectorDevice : public o2::framework::Task for (const auto& inf : infVec) { if (!inf.hasBothEPs()) { - LOGP(error, "IDC CRU {:3}: data missing at ({:8}, {:4}) for one or both end points {:02b} in TF {}", cru, inf.heartbeatOrbit, inf.heartbeatBC, inf.epSeen, tfCounter); + LOGP(critical, "IDC CRU {:3}: data missing at ({:8}, {:4}) for one or both end points {:02b} in TF {}", cru, inf.heartbeatOrbit, inf.heartbeatBC, inf.epSeen, tfCounter); hasErrors = true; } } @@ -417,12 +417,12 @@ class IDCToVectorDevice : public o2::framework::Task } if (packetsInTF != infVec.size()) { - LOGP(error, "IDC CRU {:3}: number of IDC packets {} does not match max over all CRUs {} in TF {}", cru, packetsInTF, infVec.size(), tfCounter); + LOGP(critical, "IDC CRU {:3}: number of IDC packets {} does not match max over all CRUs {} in TF {}", cru, packetsInTF, infVec.size(), tfCounter); hasErrors = true; } if (!std::equal(infVecComp->begin(), infVecComp->end(), infVec.begin())) { - LOGP(error, "IDC CRU {:3}: mismatch in orbit numbers", cru); + LOGP(critical, "IDC CRU {:3}: mismatch in orbit numbers", cru); hasErrors = true; } } diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index 95836adc02171..cc3c920bc7be1 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -579,7 +579,7 @@ auto decongestionCallbackOrdered = [](AsyncTask& task, size_t id) -> void { if (state.transitionHandling != TransitionHandlingState::NoTransition && DefaultsHelpers::onlineDeploymentMode()) { O2_SIGNPOST_EVENT_EMIT_WARN(async_queue, cid, "oldest_possible_timeslice", "Stop transition requested. Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } else { - O2_SIGNPOST_EVENT_EMIT_ERROR(async_queue, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); + O2_SIGNPOST_EVENT_EMIT_CRITICAL(async_queue, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } timesliceIndex.rescan(); } @@ -650,7 +650,7 @@ o2::framework::ServiceSpec if (state.transitionHandling != TransitionHandlingState::NoTransition && DefaultsHelpers::onlineDeploymentMode()) { O2_SIGNPOST_EVENT_EMIT_WARN(data_processor_context, cid, "oldest_possible_timeslice", "Stop transition requested. Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } else { - O2_SIGNPOST_EVENT_EMIT_ERROR(data_processor_context, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); + O2_SIGNPOST_EVENT_EMIT_CRITICAL(data_processor_context, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } timesliceIndex.rescan(); } From 74c2ec27488b2c0874884cb016b3d36771ffd7d9 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 17 Mar 2025 17:22:54 +0100 Subject: [PATCH 0082/1764] Bump nvToolsExt (#14074) --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt | 1 - Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt index 3cdb107e07438..531cf2b0dcd33 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt @@ -33,7 +33,6 @@ o2_add_library(ITStrackingCUDA O2::SimulationDataFormat O2::ReconstructionDataFormats O2::GPUCommon - CUDA::nvToolsExt PRIVATE_LINK_LIBRARIES O2::GPUTrackingCUDAExternalProvider TARGETVARNAME targetName) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu index 0bca6360d268c..7c42658242231 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu @@ -13,7 +13,7 @@ #include "ITStrackingGPU/TracerGPU.h" #if !defined(__HIPCC__) && defined(__USE_GPU_TRACER__) -#include "nvToolsExt.h" +#include constexpr uint32_t colors[] = {0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff}; constexpr int num_colors = sizeof(colors) / sizeof(uint32_t); From 64abf5c5e675edbef70cf21ae4a71b499246fb73 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 15:16:10 +0100 Subject: [PATCH 0083/1764] GPU: Fix GPUChkErr macro, GPU::Common should not use GPU::GPUTracking internals --- GPU/Common/GPUCommonHelpers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h index ad876db0d6c3a..915d93c9bc791 100644 --- a/GPU/Common/GPUCommonHelpers.h +++ b/GPU/Common/GPUCommonHelpers.h @@ -33,6 +33,7 @@ #define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false) #include "GPUCommonDef.h" +#include "GPUCommonLogger.h" #include namespace o2::gpu::internal @@ -43,7 +44,7 @@ extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)( inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line) { if (error) { - GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line); + LOGF(error, "GPUCommon Error Code %ld (%s:%d)", (long)error, file, line); } return error != 0; } From 50bf6546f14e71cd62a01b634737af44c24bc265 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 09:23:17 +0100 Subject: [PATCH 0084/1764] GPU TPC CF: Fix rounding error in qMax cut --- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx | 5 +---- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index b3b3c64095017..f876270a34358 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -98,10 +98,7 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, t if (cn.qTot <= param.rec.tpc.cfQTotCutoff) { return false; } - cn.qMax = q; - if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) { - return false; - } + cn.qMax = q; // cfQMaxCutoff check already done at PeakFinder level if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) { return false; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx index 30fdac92e8607..1de922f716c14 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx @@ -42,7 +42,7 @@ GPUdii() bool GPUTPCCFPeakFinder::isPeak( { uint16_t ll = get_local_id(0); - bool belowThreshold = (q <= calib.tpc.cfQMaxCutoff); + bool belowThreshold = (uint32_t)q <= calib.tpc.cfQMaxCutoff; uint16_t lookForPeaks; uint16_t partId = CfUtils::partition( From 1e625e72171876fbb02e0e75c9c6b73a2c6e1ad5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 10:35:55 +0100 Subject: [PATCH 0085/1764] Fix CMake warnings --- Steer/CMakeLists.txt | 2 ++ run/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Steer/CMakeLists.txt b/Steer/CMakeLists.txt index 70f50f4ab8823..8e2706d31bb0a 100644 --- a/Steer/CMakeLists.txt +++ b/Steer/CMakeLists.txt @@ -34,11 +34,13 @@ o2_target_root_dictionary(Steer include/Steer/MCKinematicsReader.h include/Steer/MaterialBudgetMap.h) o2_add_test(InteractionSampler + COMPONENT_NAME steer PUBLIC_LINK_LIBRARIES O2::SimulationDataFormat SOURCES test/testInteractionSampler.cxx LABELS steer) o2_add_test(HitProcessingManager + COMPONENT_NAME steer PUBLIC_LINK_LIBRARIES O2::Steer SOURCES test/testHitProcessingManager.cxx LABELS steer) diff --git a/run/CMakeLists.txt b/run/CMakeLists.txt index 662716901ed0a..fd43207f92d1e 100644 --- a/run/CMakeLists.txt +++ b/run/CMakeLists.txt @@ -229,6 +229,7 @@ set_property(TEST o2sim_G4 APPEND PROPERTY ENVIRONMENT ${G4ENV}) o2_add_test(CheckStackG4 + COMPONENT_NAME sim SOURCES checkStack.cxx NAME o2sim_checksimkinematics_G4 WORKING_DIRECTORY ${SIMTESTDIR} @@ -276,6 +277,7 @@ set_tests_properties(o2sim_G3 G3) o2_add_test(CheckStackG3 + COMPONENT_NAME sim SOURCES checkStack.cxx NAME o2sim_checksimkinematics_G3 WORKING_DIRECTORY ${SIMTESTDIR} From 40a883754789b3630fff732076136123a695ae3a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 10:49:01 +0100 Subject: [PATCH 0086/1764] GPU: Drop obsolete minimum version setting --- GPU/GPUTracking/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index d5a90dbd65ea3..5ce96d450f765 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -10,7 +10,6 @@ # or submit itself to any jurisdiction. set(MODULE GPUTracking) -cmake_minimum_required(VERSION 3.27 FATAL_ERROR) # set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this... # set(GPUCA_BUILD_DEBUG 1) From 134f5ea21c5edd15c2836829b5ec629925342643 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 10:52:03 +0100 Subject: [PATCH 0087/1764] GPU TPC: Remove tpcGeometry instance, use constexpr GPUTPCGeometry:: --- .../study/src/SVStudy.cxx | 3 +- .../study/src/TrackingStudy.cxx | 3 +- .../TPC/workflow/src/EntropyEncoderSpec.cxx | 4 +-- GPU/GPUTracking/Base/GPUParam.cxx | 6 ++-- GPU/GPUTracking/Base/GPUParam.h | 2 -- GPU/GPUTracking/Base/GPUParam.inc | 19 ++++++------ .../Base/GPUReconstructionConvert.cxx | 27 +++++++++-------- .../GPUTPCCompressionKernels.cxx | 14 ++++----- .../TPCClusterDecompressionCore.inc | 18 ++++++----- GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx | 2 +- GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 13 ++++---- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 2 +- .../Merger/GPUTPCGMSectorTrack.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 30 +++++++++---------- .../SectorTracker/GPUTPCTrackingData.cxx | 5 ++-- .../GPUTPCTrackletConstructor.cxx | 6 ++-- .../TPCClusterFinder/ClusterAccumulator.cxx | 8 ++--- .../GPUTPCCFCheckPadBaseline.cxx | 3 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 7 +++-- .../GPUTPCNNClusterizerKernels.cxx | 19 ++++++------ .../GPUTPCNNClusterizerKernels.h | 4 +-- GPU/GPUTracking/dEdx/GPUdEdx.cxx | 1 - GPU/GPUTracking/dEdx/GPUdEdx.h | 21 ++++++------- .../display/render/GPUDisplayImportEvent.cxx | 4 +-- GPU/GPUTracking/qa/genEvents.cxx | 2 +- 26 files changed, 118 insertions(+), 109 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx index c28048a1f9503..1e141a29d3f55 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx @@ -48,6 +48,7 @@ // #include "GPUSettingsO2.h" #include "GPUParam.h" #include "GPUParam.inc" +#include "GPUTPCGeometry.h" #include "GPUO2InterfaceRefit.h" #include "GPUO2InterfaceUtils.h" @@ -254,7 +255,7 @@ o2::dataformats::V0Ext SVStudySpec::processV0(int iv, o2::globaltracking::RecoCo tpcTr.getClusterReference(clRefs, tpcTr.getNClusterReferences() - 1, clSect, clRow, clIdx); const auto& clus = recoData.getTPCClusters().clusters[clSect][clRow][clIdx]; prInfo.lowestRow = clRow; - int npads = mParam->tpcGeometry.NPads(clRow); + int npads = o2::gpu::GPUTPCGeometry::NPads(clRow); prInfo.padFromEdge = uint8_t(clus.getPad()); if (prInfo.padFromEdge > npads / 2) { prInfo.padFromEdge = npads - 1 - prInfo.padFromEdge; diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index db57ad5f8a7eb..f206c43f7f57a 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -47,6 +47,7 @@ #include "GPUO2Interface.h" // Needed for propper settings in GPUParam.h #include "GPUParam.h" #include "GPUParam.inc" +#include "GPUTPCGeometry.h" #include "Steer/MCKinematicsReader.h" #include "MathUtils/fit.h" #include @@ -301,7 +302,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) trExt.rowMinTPC = clRow; const auto& clus = tpcClusAcc.clusters[clSect][clRow][clIdx]; trExt.padFromEdge = uint8_t(clus.getPad()); - int npads = mTPCRefitter->getParam()->tpcGeometry.NPads(clRow); + int npads = o2::gpu::GPUTPCGeometry::NPads(clRow); if (trExt.padFromEdge > npads / 2) { trExt.padFromEdge = npads - 1 - trExt.padFromEdge; } diff --git a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx index cc964ade0d87c..2efa7077be125 100644 --- a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx +++ b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx @@ -230,9 +230,9 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) int myThread = 0; #endif unsigned int count = 0; - const float x = mParam->tpcGeometry.Row2X(j); + const float x = GPUTPCGeometry::Row2X(j); auto checker = [i, j, firstIR, totalT, x, this, &preCl, &count, &outBuffer = tmpBuffer[myThread], &rejectHits, &clustersFiltered](const o2::tpc::ClusterNative& cl, unsigned int k) { - const float y = mParam->tpcGeometry.LinearPad2Y(i, j, cl.getPad()); + const float y = GPUTPCGeometry::LinearPad2Y(i, j, cl.getPad()); const float r = sqrtf(x * x + y * y); const float maxz = r * mEtaFactor + mMaxZ; const unsigned int deltaBC = std::max(0.f, totalT - mFastTransform->convDeltaZtoDeltaTimeInTimeFrameAbs(maxz)) * constants::LHCBCPERTIMEBIN; diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 192e46c36dc68..bbca150df405a 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -21,6 +21,7 @@ #include "GPUDataTypes.h" #include "GPUConstantMem.h" #include "DetectorsBase/Propagator.h" +#include "GPUTPCGeometry.h" using namespace o2::gpu; @@ -32,7 +33,6 @@ using namespace o2::gpu; void GPUParam::SetDefaults(float solenoidBz) { memset((void*)this, 0, sizeof(*this)); - new (&tpcGeometry) GPUTPCGeometry; new (&rec) GPUSettingsRec; occupancyMap = nullptr; occupancyTotal = 0; @@ -178,8 +178,8 @@ void GPUParam::UpdateRun3ClusterErrors(const float* yErrorParam, const float* zE for (int32_t rowType = 0; rowType < 4; rowType++) { constexpr int32_t regionMap[4] = {0, 4, 6, 8}; ParamErrors[yz][rowType][0] = param[0] * param[0]; - ParamErrors[yz][rowType][1] = param[1] * param[1] * tpcGeometry.PadHeightByRegion(regionMap[rowType]); - ParamErrors[yz][rowType][2] = param[2] * param[2] / tpcGeometry.TPCLength() / tpcGeometry.PadHeightByRegion(regionMap[rowType]); + ParamErrors[yz][rowType][1] = param[1] * param[1] * GPUTPCGeometry::PadHeightByRegion(regionMap[rowType]); + ParamErrors[yz][rowType][2] = param[2] * param[2] / GPUTPCGeometry::TPCLength() / GPUTPCGeometry::PadHeightByRegion(regionMap[rowType]); ParamErrors[yz][rowType][3] = param[3] * param[3] * rec.tpc.clusterErrorOccupancyScaler * rec.tpc.clusterErrorOccupancyScaler; } } diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index fbce6246de112..4b77628c88775 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -19,7 +19,6 @@ #include "GPUCommonMath.h" #include "GPUDef.h" #include "GPUSettings.h" -#include "GPUTPCGeometry.h" #include "GPUTPCGMPolynomialField.h" #if !defined(GPUCA_GPUCODE) @@ -59,7 +58,6 @@ struct GPUParam_t { int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; - GPUTPCGeometry tpcGeometry; // TPC Geometry GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM const uint32_t* occupancyMap; // Ptr to TPC occupancy map uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 19dc1fc4a3578..a118a8f639fe9 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -18,6 +18,7 @@ #include "GPUParam.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCClusterOccupancyMap.h" +#include "GPUTPCGeometry.h" namespace o2::gpu { @@ -42,14 +43,14 @@ GPUdi() void GPUParam::Global2Sector(int32_t iSector, float X, float Y, float Z, GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { - const int32_t rowType = tpcGeometry.GetROC(iRow); - z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); + const int32_t rowType = GPUTPCGeometry::GetROC(iRow); + z = CAMath::Abs(GPUTPCGeometry::TPCLength() - CAMath::Abs(z)); const float s2 = CAMath::Min(sinPhi * sinPhi, 0.95f * 0.95f); const float sec2 = 1.f / (1.f - s2); const float angleY2 = s2 * sec2; // dy/dx const float angleZ2 = DzDs * DzDs * sec2; // dz/dx - const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / tpcGeometry.Row2X(iRow) : 0.f; + const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / GPUTPCGeometry::Row2X(iRow) : 0.f; ErrY2 = GetClusterErrorSeeding(0, rowType, z, angleY2, unscaledMult); // Returns Err2 ErrZ2 = GetClusterErrorSeeding(1, rowType, z, angleZ2, unscaledMult); // Returns Err2 @@ -132,8 +133,8 @@ GPUdi() float GPUParam::GetClusterErrorSeeding(int32_t yz, int32_t type, float z GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { - int32_t rowType = tpcGeometry.GetROC(iRow); - z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); + int32_t rowType = GPUTPCGeometry::GetROC(iRow); + z = CAMath::Abs(GPUTPCGeometry::TPCLength() - CAMath::Abs(z)); const float s2 = CAMath::Min(sinPhi * sinPhi, 0.95f * 0.95f); float sec2 = 1.f / (1.f - s2); float angleY2 = s2 * sec2; // dy/dx @@ -172,14 +173,14 @@ GPUdi() float GPUParam::GetSystematicClusterErrorC122(float trackX, float trackY GPUdi() void GPUParam::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const { - const int32_t rowType = tpcGeometry.GetROC(iRow); - z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); + const int32_t rowType = GPUTPCGeometry::GetROC(iRow); + z = CAMath::Abs(GPUTPCGeometry::TPCLength() - CAMath::Abs(z)); const float s2 = CAMath::Min(sinPhi * sinPhi, 0.95f * 0.95f); const float sec2 = 1.f / (1.f - s2); const float angleY2 = s2 * sec2; // dy/dx const float angleZ2 = DzDs * DzDs * sec2; // dz/dx - const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / tpcGeometry.Row2X(iRow) : 0.f; + const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / GPUTPCGeometry::Row2X(iRow) : 0.f; const float scaledInvAvgCharge = avgInvCharge * rec.tpc.clusterErrorChargeScaler > 0.f ? avgInvCharge * rec.tpc.clusterErrorChargeScaler : 1.f; const float scaledInvCharge = invCharge * rec.tpc.clusterErrorChargeScaler > 0.f ? invCharge * rec.tpc.clusterErrorChargeScaler : 1.f; @@ -218,7 +219,7 @@ GPUdi() float GPUParam::GetUnscaledMult(float time) const GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const { - return CAMath::Abs(uncorrectedY) > (tpcGeometry.NPads(iRow) - 1) * 0.5f * tpcGeometry.PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; + return CAMath::Abs(uncorrectedY) > (GPUTPCGeometry::NPads(iRow) - 1) * 0.5f * GPUTPCGeometry::PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; } } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index bc760f6188caa..e12ca7ec601ad 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -24,6 +24,7 @@ #include "GPUTPCClusterData.h" #include "GPUO2DataTypes.h" #include "GPUDataTypes.h" +#include "GPUTPCGeometry.h" #include "AliHLTTPCRawCluster.h" #include "GPUParam.h" #include "GPULogging.h" @@ -275,10 +276,10 @@ struct zsEncoderRow : public zsEncoder { inline bool zsEncoderRow::sort(const o2::tpc::Digit a, const o2::tpc::Digit b) { - int32_t endpointa = param->tpcGeometry.GetRegion(a.getRow()); - int32_t endpointb = param->tpcGeometry.GetRegion(b.getRow()); - endpointa = 2 * endpointa + (a.getRow() >= param->tpcGeometry.GetRegionStart(endpointa) + param->tpcGeometry.GetRegionRows(endpointa) / 2); - endpointb = 2 * endpointb + (b.getRow() >= param->tpcGeometry.GetRegionStart(endpointb) + param->tpcGeometry.GetRegionRows(endpointb) / 2); + int32_t endpointa = GPUTPCGeometry::GetRegion(a.getRow()); + int32_t endpointb = GPUTPCGeometry::GetRegion(b.getRow()); + endpointa = 2 * endpointa + (a.getRow() >= GPUTPCGeometry::GetRegionStart(endpointa) + GPUTPCGeometry::GetRegionRows(endpointa) / 2); + endpointb = 2 * endpointb + (b.getRow() >= GPUTPCGeometry::GetRegionStart(endpointb) + GPUTPCGeometry::GetRegionRows(endpointb) / 2); if (endpointa != endpointb) { return endpointa <= endpointb; } @@ -295,11 +296,11 @@ bool zsEncoderRow::checkInput(std::vector& tmpBuffer, uint32_t k { seqLen = 1; if (lastRow != tmpBuffer[k].getRow()) { - endpointStart = param->tpcGeometry.GetRegionStart(curRegion); + endpointStart = GPUTPCGeometry::GetRegionStart(curRegion); endpoint = curRegion * 2; - if (tmpBuffer[k].getRow() >= endpointStart + param->tpcGeometry.GetRegionRows(curRegion) / 2) { + if (tmpBuffer[k].getRow() >= endpointStart + GPUTPCGeometry::GetRegionRows(curRegion) / 2) { endpoint++; - endpointStart += param->tpcGeometry.GetRegionRows(curRegion) / 2; + endpointStart += GPUTPCGeometry::GetRegionRows(curRegion) / 2; } } for (uint32_t l = k + 1; l < tmpBuffer.size(); l++) { @@ -408,7 +409,7 @@ void zsEncoderRow::decodePage(std::vector& outputBuffer, const z if ((uint32_t)region != decEndpoint / 2) { throw std::runtime_error("CRU ID / endpoint mismatch"); } - int32_t nRowsRegion = param->tpcGeometry.GetRegionRows(region); + int32_t nRowsRegion = GPUTPCGeometry::GetRegionRows(region); int32_t timeBin = (decHDR->timeOffset + (uint64_t)(o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) - firstOrbit) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN; for (int32_t l = 0; l < decHDR->nTimeBinSpan; l++) { @@ -420,7 +421,7 @@ void zsEncoderRow::decodePage(std::vector& outputBuffer, const z if (tbHdr->rowMask != 0 && ((upperRows) ^ ((decEndpoint & 1) != 0))) { throw std::runtime_error("invalid endpoint"); } - const int32_t rowOffset = param->tpcGeometry.GetRegionStart(region) + (upperRows ? (nRowsRegion / 2) : 0); + const int32_t rowOffset = GPUTPCGeometry::GetRegionStart(region) + (upperRows ? (nRowsRegion / 2) : 0); const int32_t nRows = upperRows ? (nRowsRegion - nRowsRegion / 2) : (nRowsRegion / 2); const int32_t nRowsUsed = __builtin_popcount((uint32_t)(tbHdr->rowMask & 0x7FFF)); decPagePtr += nRowsUsed ? (2 * nRowsUsed) : 2; @@ -513,7 +514,7 @@ void zsEncoderLinkBased::createBitmask(std::vector& tmpBuffer, u uint32_t l; for (l = k; l < tmpBuffer.size(); l++) { const auto& a = tmpBuffer[l]; - int32_t cruinsector = param->tpcGeometry.GetRegion(a.getRow()); + int32_t cruinsector = GPUTPCGeometry::GetRegion(a.getRow()); o2::tpc::GlobalPadNumber pad = mapper.globalPadNumber(o2::tpc::PadPos(a.getRow(), a.getPad())); o2::tpc::FECInfo fec = mapper.fecInfo(pad); o2::tpc::CRU cru = cruinsector; @@ -535,8 +536,8 @@ void zsEncoderLinkBased::createBitmask(std::vector& tmpBuffer, u bool zsEncoderLinkBased::sort(const o2::tpc::Digit a, const o2::tpc::Digit b) { // Fixme: this is blasphemy... one shoult precompute all values and sort an index array - int32_t cruinsectora = param->tpcGeometry.GetRegion(a.getRow()); - int32_t cruinsectorb = param->tpcGeometry.GetRegion(b.getRow()); + int32_t cruinsectora = GPUTPCGeometry::GetRegion(a.getRow()); + int32_t cruinsectorb = GPUTPCGeometry::GetRegion(b.getRow()); if (cruinsectora != cruinsectorb) { return cruinsectora < cruinsectorb; } @@ -1124,7 +1125,7 @@ inline uint32_t zsEncoderRun::run(std::vector* buffer, std::vectortpcGeometry.GetRegion(tmpBuffer[k].getRow()); + curRegion = GPUTPCGeometry::GetRegion(tmpBuffer[k].getRow()); } mustWriteSubPage = checkInput(tmpBuffer, k); } else { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 4831be9b12bcc..445c03113cd39 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -18,7 +18,6 @@ #include "GPUParam.h" #include "GPUCommonAlgorithm.h" #include "GPUTPCCompressionTrackModel.h" -#include "GPUTPCGeometry.h" #include "GPUTPCClusterRejection.h" #include "GPUTPCCompressionKernels.inc" @@ -68,9 +67,10 @@ GPUdii() void GPUTPCCompressionKernels::Thread clusters[hit.sector][hit.row][hit.num - clusters->clusterOffset[hit.sector][hit.row]]; - float x = param.tpcGeometry.Row2X(hit.row); - float y = track.LinearPad2Y(hit.sector, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); - float z = param.tpcGeometry.LinearTime2Z(hit.sector, orgCl.getTime()); + constexpr GPUTPCGeometry geo; + float x = geo.Row2X(hit.row); + float y = track.LinearPad2Y(hit.sector, orgCl.getPad(), geo.PadWidth(hit.row), geo.NPads(hit.row)); + float z = geo.LinearTime2Z(hit.sector, orgCl.getTime()); if (nClustersStored) { if ((hit.sector < GPUCA_NSECTORS) ^ (lastSector < GPUCA_NSECTORS)) { break; @@ -78,7 +78,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread @@ -80,6 +81,7 @@ class TPCClusterDecompressionCore uint32_t slice = cmprClusters.sliceA[trackIndex]; uint32_t row = cmprClusters.rowA[trackIndex]; GPUTPCCompressionTrackModel track; + constexpr GPUTPCGeometry geo; uint32_t clusterIndex; for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) { uint32_t pad = 0, time = 0; @@ -105,23 +107,23 @@ class TPCClusterDecompressionCore if (changeLeg && track.Mirror()) { break; } - if (track.Propagate(param.tpcGeometry.Row2X(row), param.SectorParam[slice].Alpha)) { + if (track.Propagate(geo.Row2X(row), param.SectorParam[slice].Alpha)) { break; } uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1]; if (timeTmp & 800000) { timeTmp |= 0xFF000000; } - time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset))); - float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(slice, track.Y(), param.tpcGeometry.PadWidth(row), param.tpcGeometry.NPads(row)))); + time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, geo.LinearZ2Time(slice, track.Z() + zOffset))); + float tmpPad = CAMath::Max(0.f, CAMath::Min((float)geo.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(slice, track.Y(), geo.PadWidth(row), geo.NPads(row)))); pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad); time = time & 0xFFFFFF; pad = (uint16_t)pad; - if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) { + if (pad >= geo.NPads(row) * ClusterNative::scalePadPacked) { if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2 pad = 0; } else { - pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1; + pad = geo.NPads(row) * ClusterNative::scalePadPacked - 1; } } if (param.continuousMaxTimeBin > 0 && time >= maxTime) { @@ -136,11 +138,11 @@ class TPCClusterDecompressionCore pad = cmprClusters.padA[trackIndex]; } const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...); - float y = track.LinearPad2Y(slice, cluster.getPad(), param.tpcGeometry.PadWidth(row), param.tpcGeometry.NPads(row)); - float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); + float y = track.LinearPad2Y(slice, cluster.getPad(), geo.PadWidth(row), geo.NPads(row)); + float z = geo.LinearTime2Z(slice, cluster.getTime()); if (clusterIndex == 0) { zOffset = z; - track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SectorParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); + track.Init(geo.Row2X(row), y, z - zOffset, param.SectorParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); } if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) { break; diff --git a/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx b/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx index 40ce8c0ccda81..0b8e67fbe495e 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx +++ b/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; TPCPadBitMap::TPCPadBitMap() { - GPUTPCGeometry geo{}; + constexpr GPUTPCGeometry geo; int32_t offset = 0; for (int32_t r = 0; r < GPUCA_ROW_COUNT; r++) { mPadOffsetPerRow[r] = offset; diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx index 41f0ad819d1b6..a20f3dc8aac1d 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx +++ b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; TPCPadGainCalib::TPCPadGainCalib() { - GPUTPCGeometry geo{}; + constexpr GPUTPCGeometry geo{}; int32_t offset = 0; for (int32_t r = 0; r < GPUCA_ROW_COUNT; r++) { mPadOffsetPerRow[r] = offset; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 1c2a8e2b29a9c..f373d56ea0395 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -36,6 +36,7 @@ #include "GPUO2DataTypes.h" #include "TPCFastTransform.h" #include "GPUTPCConvertImpl.h" +#include "GPUTPCGeometry.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" @@ -601,13 +602,13 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT } else if (iBorder == 1) { // transport to the right edge of the sector and rotate horizontally dAlpha = -dAlpha - CAMath::Pi() / 2; } else if (iBorder == 2) { // transport to the middle of the sector and rotate vertically to the border on the left - x0 = Param().tpcGeometry.Row2X(63); + x0 = GPUTPCGeometry::Row2X(63); } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right dAlpha = -dAlpha; - x0 = Param().tpcGeometry.Row2X(63); + x0 = GPUTPCGeometry::Row2X(63); } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation dAlpha = 0; - x0 = Param().tpcGeometry.Row2X(63); + x0 = GPUTPCGeometry::Row2X(63); } const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); @@ -955,7 +956,7 @@ template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - float x0 = Param().tpcGeometry.Row2X(63); + float x0 = GPUTPCGeometry::Row2X(63); const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { @@ -1295,7 +1296,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const int32_t sector = track->Sector(); for (int32_t attempt = 0; attempt < 2; attempt++) { GPUTPCGMBorderTrack b; - const float x0 = Param().tpcGeometry.Row2X(attempt == 0 ? 63 : cls.row); + const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) { b.SetTrackID(itr); b.SetNClusters(mOutputTracks[itr].NClusters()); @@ -1759,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : Param().tpcGeometry.Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index ae413aaa98648..1e4cc633eb4ca 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -353,7 +353,7 @@ void GPUTPCGMMerger::DebugStreamerUpdate(int32_t iTrk, int32_t ihit, float xx, f auto uncorrectedYZ = StreamerUncorrectedZY(cluster.sector, cluster.row, track, prop); float invCharge = 1.f / clusterNative.qMax; int32_t iRow = cluster.row; - float unscaledMult = (time >= 0.f ? Param().GetUnscaledMult(time) / Param().tpcGeometry.Row2X(iRow) : 0.f); + float unscaledMult = (time >= 0.f ? Param().GetUnscaledMult(time) / GPUTPCGeometry::Row2X(iRow) : 0.f); const float clAlpha = Param().Alpha(cluster.sector); uint32_t occupancyTotal = Param().occupancyTotal; o2::utils::DebugStreamer::instance()->getStreamer("debug_update_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_update_track").data() diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index a439e6e653039..11b153c7f0d8b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -95,7 +95,7 @@ GPUd() void GPUTPCGMSectorTrack::SetParam2(const GPUTPCGMTrackParam& trk) GPUd() bool GPUTPCGMSectorTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi, float sinPhiMargin) { float lastX; - // float lastX = merger->Param().tpcGeometry.Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks + // float lastX = GPUTPCGeometry::Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks float y, z; int32_t row, index; const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSector]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index be1d3803312fe..3bd2257d02e01 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -219,7 +219,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { - dEdx.fillSubThreshold(lastRow - wayDirection, param); + dEdx.fillSubThreshold(lastRow - wayDirection); } } @@ -384,7 +384,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ pad /= clusterCount; relTime /= clusterCount; relTime = relTime - CAMath::Round(relTime); - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime); + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } else if (retVal >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track if (allowModification) { @@ -650,7 +650,7 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP return dodEdx; } int32_t step = toRow > lastRow ? 1 : -1; - float xx = mX - Merger->Param().tpcGeometry.Row2X(lastRow); + float xx = mX - GPUTPCGeometry::Row2X(lastRow); for (int32_t iRow = lastRow + step; iRow != toRow; iRow += step) { if (CAMath::Abs(mP[2]) > maxSinPhi) { return dodEdx; @@ -658,15 +658,15 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP if (CAMath::Abs(mP[0]) > CAMath::Abs(mX) * CAMath::Tan(kSectAngle / 2.f)) { return dodEdx; } - int32_t err = prop.PropagateToXAlpha(xx + Merger->Param().tpcGeometry.Row2X(iRow), prop.GetAlpha(), inFlyDirection); + int32_t err = prop.PropagateToXAlpha(xx + GPUTPCGeometry::Row2X(iRow), prop.GetAlpha(), inFlyDirection); if (err) { return dodEdx; } if (dodEdx && iRow + step == toRow) { float yUncorrected, zUncorrected; Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, mP[0], mP[1], yUncorrected, zUncorrected); - uint32_t pad = CAMath::Float2UIntRn(Merger->Param().tpcGeometry.LinearY2Pad(sector, iRow, yUncorrected)); - if (pad >= Merger->Param().tpcGeometry.NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(sector, iRow, pad))) { + uint32_t pad = CAMath::Float2UIntRn(GPUTPCGeometry::LinearY2Pad(sector, iRow, yUncorrected)); + if (pad >= GPUTPCGeometry::NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(sector, iRow, pad))) { dodEdx = false; } } @@ -782,7 +782,7 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr } CADEBUG(printf("\tPropagated to y = %f: X %f Z %f SinPhi %f\n", mX, mP[0], mP[1], mP[2])); for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - float rowX = Merger->Param().tpcGeometry.Row2X(j); + float rowX = GPUTPCGeometry::Row2X(j); if (CAMath::Abs(rowX - (-mP[0] * lrFactor)) < 1.5f) { CADEBUG(printf("\t\tAttempt row %d (Y %f Z %f)\n", j, mX * lrFactor, mP[1])); AttachClusters(Merger, sector, j, iTrack, false, mX * lrFactor, mP[1]); @@ -823,18 +823,18 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr prop.Rotate180(); CADEBUG(printf("\tMirrored position: Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f\n", prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3])); iRow = toRow; - float dx = toX - Merger->Param().tpcGeometry.Row2X(toRow); + float dx = toX - GPUTPCGeometry::Row2X(toRow); if (up ^ (toX > mX)) { if (up) { - while (iRow < GPUCA_ROW_COUNT - 2 && Merger->Param().tpcGeometry.Row2X(iRow + 1) + dx <= mX) { + while (iRow < GPUCA_ROW_COUNT - 2 && GPUTPCGeometry::Row2X(iRow + 1) + dx <= mX) { iRow++; } } else { - while (iRow > 1 && Merger->Param().tpcGeometry.Row2X(iRow - 1) + dx >= mX) { + while (iRow > 1 && GPUTPCGeometry::Row2X(iRow - 1) + dx >= mX) { iRow--; } } - prop.PropagateToXAlpha(Merger->Param().tpcGeometry.Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); + prop.PropagateToXAlpha(GPUTPCGeometry::Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); AttachClustersPropagate(Merger, sector, iRow, toRow, iTrack, false, prop, inFlyDirection); } if (prop.PropagateToXAlpha(toX, prop.GetAlpha(), inFlyDirection)) { @@ -875,7 +875,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU return; } float dx = (toX - X) / count; - const float myRowX = Merger->Param().tpcGeometry.Row2X(iRow); + const float myRowX = GPUTPCGeometry::Row2X(iRow); // printf("AttachMirror\n"); // printf("X %f Y %f Z %f SinPhi %f toY %f -->\n", mX, mP[0], mP[1], mP[2], toY); // printf("X %f Y %f Z %f SinPhi %f, count %d dx %f (to: %f)\n", X, Y, Z, SinPhi, count, dx, X + count * dx); @@ -905,7 +905,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU int32_t step = paramX >= mX ? 1 : -1; int32_t found = 0; for (int32_t j = iRow; j >= 0 && j < GPUCA_ROW_COUNT && found < 3; j += step) { - float rowX = mX + Merger->Param().tpcGeometry.Row2X(j) - myRowX; + float rowX = mX + GPUTPCGeometry::Row2X(j) - myRowX; if (CAMath::Abs(rowX - paramX) < 1.5f) { // printf("Attempt row %d\n", j); AttachClusters(Merger, sector, j, iTrack, false, mP[2] > 0 ? X : -X, Z); @@ -930,8 +930,8 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, const auto& GPUrestrict() cls = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear; tzInner = cls[clusters[N - 1].num].getTime(); tzOuter = cls[clusters[0].num].getTime(); - xInner = merger->Param().tpcGeometry.Row2X(clusters[N - 1].row); - xOuter = merger->Param().tpcGeometry.Row2X(clusters[0].row); + xInner = GPUTPCGeometry::Row2X(clusters[N - 1].row); + xOuter = GPUTPCGeometry::Row2X(clusters[0].row); } ShiftZ(merger, clusters[0].sector, tzInner, tzOuter, xInner, xOuter); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx index a3e73c377ed44..9a4d2eebcb953 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx @@ -19,6 +19,7 @@ #include "GPUProcessor.h" #include "GPUO2DataTypes.h" #include "GPUTPCConvertImpl.h" +#include "GPUTPCGeometry.h" #include "GPUCommonMath.h" #ifndef GPUCA_GPUCODE_DEVICE @@ -39,7 +40,7 @@ void GPUTPCTrackingData::InitializeRows(const GPUParam& p) new (&mRows[i]) GPUTPCRow; } for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { - mRows[i].mX = p.tpcGeometry.Row2X(i); + mRows[i].mX = GPUTPCGeometry::Row2X(i); mRows[i].mMaxY = CAMath::Tan(p.par.dAlpha / 2.f) * mRows[i].mX; } } @@ -101,7 +102,7 @@ void* GPUTPCTrackingData::SetPointersRows(void* mem) GPUd() void GPUTPCTrackingData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) { maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1; - maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50; + maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : GPUTPCGeometry::TPCLength()) + 50; maxZ = maxZ / GPUCA_MIN_BIN_SIZE + 1; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 3aac31c87498c..5a7df0ba8b874 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -377,8 +377,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } while (false); (void)found; if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { - uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); - if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { + uint32_t pad = CAMath::Float2UIntRn(GPUTPCGeometry::LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); + if (pad < GPUTPCGeometry::NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { r.mNMissed--; rowHit = CALINK_DEAD_CHANNEL; } @@ -395,7 +395,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, const float z1 = row1.Grid().ZMin() + hh1.y * row1.HstepZ(); const float z2 = row2.Grid().ZMin() + hh2.y * row2.HstepZ(); float oldOffset = tParam.ZOffset(); - tParam.ShiftZ(z1, z2, tracker.Param().tpcGeometry.Row2X(r.mFirstRow), tracker.Param().tpcGeometry.Row2X(r.mLastRow), tracker.Param().bzCLight, tracker.Param().rec.tpc.defaultZOffsetOverR); + tParam.ShiftZ(z1, z2, GPUTPCGeometry::Row2X(r.mFirstRow), GPUTPCGeometry::Row2X(r.mLastRow), tracker.Param().bzCLight, tracker.Param().rec.tpc.defaultZOffsetOverR); r.mLastZ -= tParam.ZOffset() - oldOffset; CADEBUG(printf("Shifted z from %f to %f\n", oldOffset, tParam.ZOffset())); } diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index f876270a34358..622da856af805 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -13,9 +13,9 @@ /// \author Felix Weiglhofer #include "ClusterAccumulator.h" -#include "GPUTPCGeometry.h" #include "CfUtils.h" #include "GPUParam.h" +#include "GPUTPCGeometry.h" #include "DataFormatsTPC/ClusterNative.h" using namespace o2::gpu; @@ -81,17 +81,17 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, t bool isEdgeCluster; if (param.rec.tpc.cfEdgeTwoPads) { - isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + isEdgeCluster = pad < 2 || pad >= GPUTPCGeometry::NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge if (isEdgeCluster) { bool leftEdge = (pad < 2); - if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { + if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (GPUTPCGeometry::NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak } } } else { - isEdgeCluster = pad == 0 || pad == param.tpcGeometry.NPads(pos.row()) - 1; + isEdgeCluster = pad == 0 || pad == GPUTPCGeometry::NPads(pos.row()) - 1; } cn.qTot = CAMath::Float2UIntRn(mQtot); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 4a167b7d53890..1e76860331de6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -15,6 +15,7 @@ #include "GPUTPCCFCheckPadBaseline.h" #include "Array2D.h" #include "PackedCharge.h" +#include "GPUTPCGeometry.h" #include "clusterFinderDefs.h" #ifndef GPUCA_GPUCODE @@ -151,7 +152,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer) { - const GPUTPCGeometry& geo = clusterer.Param().tpcGeometry; + constexpr GPUTPCGeometry geo; int32_t padOffset = 0; for (Row r = 0; r < GPUCA_ROW_COUNT; r++) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index f1fd95d696f5d..6662b93eccb78 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -22,6 +22,7 @@ #include "GPUCommonAlgorithm.h" #include "TPCPadGainCalib.h" #include "TPCZSLinkMapping.h" +#include "GPUTPCGeometry.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -57,8 +58,8 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset; if (iThread == 0) { const int32_t region = endpoint / 2; - s.nRowsRegion = clusterer.Param().tpcGeometry.GetRegionRows(region); - s.regionStartRow = clusterer.Param().tpcGeometry.GetRegionStart(region); + s.nRowsRegion = GPUTPCGeometry::GetRegionRows(region); + s.regionStartRow = GPUTPCGeometry::GetRegionStart(region); s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((s.nRowsRegion + (endpoint & 1)) / 2)); s.rowStride = nThreads / s.nThreadsPerRow; s.rowOffsetCounter = 0; @@ -524,7 +525,7 @@ GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorTy { #ifdef GPUCA_TPC_GEOMETRY_O2 // Ported from tpc::Mapper (Not available on GPU...) - const GPUTPCGeometry& geo = clusterer.Param().tpcGeometry; + constexpr GPUTPCGeometry geo; const int32_t regionIter = cru % 2; const int32_t istreamm = ((rawFECChannel % 10) / 2); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 25cd2497fbf62..379ea27443fea 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -14,6 +14,7 @@ #include "GPUTPCNNClusterizerKernels.h" #include "GPUTPCCFClusterizer.h" +#include "GPUTPCGeometry.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -102,9 +103,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 62 ? global_shift : 0); } -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift, const GPUTPCGeometry& geo) +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) { if (pad < 0 || row < 0) { // Faster short-circuit return true; } else if (row < 63) { - return (pad >= static_cast(geo.NPads(row))); + return (pad >= static_cast(GPUTPCGeometry::NPads(row))); } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network return true; } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { - return (pad >= static_cast(geo.NPads(row - global_shift))); + return (pad >= static_cast(GPUTPCGeometry::NPads(row - global_shift))); } else { return true; } @@ -152,9 +153,9 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n #endif for (int r = -clustererNN.nnClusterizerSizeInputRow; r <= clustererNN.nnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); - int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r, clusterer.Param().tpcGeometry); + int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); for (int p = -clustererNN.nnClusterizerSizeInputPad + pad_offset; p <= clustererNN.nnClusterizerSizeInputPad + pad_offset; p++) { - bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow, clusterer.Param().tpcGeometry); + bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) { if (!is_boundary) { ChargePos tmp_pos(row + r, pad + p, time + t); @@ -183,11 +184,11 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n if (dtype == 0) { clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISector / 36.f); clustererNN.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row)); + clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { clustererNN.inputData32[write_idx] = clusterer.mISector / 36.f; clustererNN.inputData32[write_idx + 1] = row / 152.f; - clustererNN.inputData32[write_idx + 2] = static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row); + clustererNN.inputData32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); } } } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index c7bd18115d61f..e6c1dc508d6e4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -67,9 +67,9 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); - static GPUd() int padOffset(int, int, const GPUTPCGeometry&); + static GPUd() int padOffset(int, int); static GPUd() int rowOffset(int, int); - static GPUd() bool isBoundary(int, int, int, const GPUTPCGeometry&); + static GPUd() bool isBoundary(int, int, int); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index 2e67ddda7c99c..b7da0de4c0e29 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -13,7 +13,6 @@ /// \author David Rohr #include "GPUdEdx.h" -#include "GPUTPCGeometry.h" #include "GPUdEdxInfo.h" #include "GPUCommonAlgorithm.h" #include "GPUParam.h" diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 6c0a96d3adb75..bcd75af468c28 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -16,12 +16,12 @@ #define GPUDEDX_H #include "GPUDef.h" -#include "GPUTPCGeometry.h" #include "GPUCommonMath.h" #include "GPUParam.h" #include "GPUdEdxInfo.h" #include "DataFormatsTPC/Defs.h" #include "CalibdEdxContainer.h" +#include "GPUTPCGeometry.h" #include "GPUDebugStreamer.h" namespace o2::gpu @@ -32,8 +32,8 @@ class GPUdEdx public: // The driver must call clear(), fill clusters row by row outside-in, then run computedEdx() to get the result GPUd() void clear(); - GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); - GPUd() void fillSubThreshold(int32_t padRow, const GPUParam& param); + GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); + GPUd() void fillSubThreshold(int32_t padRow); GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: @@ -92,7 +92,7 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) mLastROC = roc; } -GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& GPUrestrict() param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) +GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) { if (mCount >= MAX_NCL) { return; @@ -100,8 +100,9 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint // container containing all the dE/dx corrections auto calibContainer = calib.dEdxCalibContainer; + constexpr GPUTPCGeometry geo; - const int32_t roc = param.tpcGeometry.GetROC(padRow); + const int32_t roc = geo.GetROC(padRow); checkSubThresh(roc); float snp2 = trackSnp * trackSnp; if (snp2 > GPUCA_MAX_SIN_PHI_LOW) { @@ -119,7 +120,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint // getting the topology correction const uint32_t padPos = CAMath::Float2UIntRn(pad); // position of the pad is shifted half a pad ( pad=3 -> centre position of third pad) const float absRelPad = CAMath::Abs(pad - padPos); - const int32_t region = param.tpcGeometry.GetRegion(padRow); + const int32_t region = geo.GetRegion(padRow); z = CAMath::Abs(z); const float threshold = calibContainer->getZeroSupressionThreshold(sector, padRow, padPos); // TODO: Use the mean zero supresion threshold of all pads in the cluster? const bool useFullGainMap = calibContainer->isUsageOfFullGainMap(); @@ -161,8 +162,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint } GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamdEdx)) { - float padlx = param.tpcGeometry.Row2X(padRow); - float padly = param.tpcGeometry.LinearPad2Y(sector, padRow, padPos); + float padlx = geo.Row2X(padRow); + float padly = geo.LinearPad2Y(sector, padRow, padPos); o2::utils::DebugStreamer::instance()->getStreamer("debug_dedx", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_dedx").data() << "qTot=" << mChargeTot[mCount - 1] << "qMax=" << mChargeMax[mCount - 1] @@ -189,9 +190,9 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint }) } -GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestrict() param) +GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow) { - const int32_t roc = param.tpcGeometry.GetROC(padRow); + const int32_t roc = GPUTPCGeometry::GetROC(padRow); checkSubThresh(roc); mNSubThresh++; } diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index 6fd70354c9486..0a780732273db 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -150,13 +150,13 @@ void GPUDisplay::DrawGLScene_updateEventData() float4* ptr = &mGlobalPos[cid]; if (mParam->par.earlyTpcTransform) { const auto& cl = mIOPtrs->clusterData[iSector][i]; - mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? GPUTPCGeometry::Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); } else { float x, y, z; const auto& cln = mIOPtrs->clustersNative->clusters[iSector][0][i]; GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSector, row, cln.getPad(), cln.getTime(), x, y, z); if (mCfgH.clustersOnNominalRow) { - x = mParam->tpcGeometry.Row2X(row); + x = GPUTPCGeometry::Row2X(row); } mParam->Sector2Global(iSector, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); } diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 627cfc5f9909a..2e1bc1c5c64b2 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -222,7 +222,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) for (int32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { // if( iRow>=50 ) break; //SG!!! - float xRow = param.tpcGeometry.Row2X(iRow); + float xRow = GPUTPCGeometry::Row2X(iRow); // transport to row int32_t err = 0; for (int32_t itry = 0; itry < 1; itry++) { From 9070674d026b2540656e18760bb3ed1f8517835b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 17:54:41 +0100 Subject: [PATCH 0088/1764] GPU: Improve memory usage debug printout --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 15 +++++++-------- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 8 ++++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 2bd4c0e937c20..b715b08f52b32 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -811,11 +811,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); } if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) { - if (IsGPU()) { - printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0); - } - printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0); - printf("%16s", ""); + printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); + PrintMemoryOverview(); + printf("%76s", ""); PrintMemoryMax(); } mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back()); @@ -888,9 +886,10 @@ void GPUReconstruction::PrintMemoryMax() void GPUReconstruction::PrintMemoryOverview() { if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { - printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n", - ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), - ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), mMemoryResources.size()); + printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n", + ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), ptrDiff(mHostMemoryPool, mHostMemoryPermanent), ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), + ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), ptrDiff(mDeviceMemoryPool, mDeviceMemoryPermanent), ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), + mMemoryResources.size()); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index f397fc51bd407..abf2d55c95db7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -215,6 +215,10 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; + if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + printf("Allocated memory when starting processing %34s", ""); + PrintMemoryOverview(); + } mTimerTotal.Start(); const std::clock_t cpuTimerStart = std::clock(); if (mProcessingSettings.doublePipeline) { @@ -235,6 +239,10 @@ int32_t GPUReconstructionCPU::RunChains() } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; + if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + printf("Allocated memory when ending processing %36s", ""); + PrintMemoryOverview(); + } mStatWallTime = (mTimerTotal.GetElapsedTime() * 1000000. / mStatNEvents); std::string nEventReport; From 435b17ba0828727f3962032c4aeefd4e27891bbd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 18:02:01 +0100 Subject: [PATCH 0089/1764] GPU TPC: Add option to clear all non-external-output TPC memory at end of processing --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 3 +++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 4 insertions(+) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index abf2d55c95db7..a4074282da30f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -236,6 +236,9 @@ int32_t GPUReconstructionCPU::RunChains() return retVal; } } + if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) { + ClearAllocatedMemory(); + } } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b7f761c73ffc0..f7004d76c726c 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -326,6 +326,7 @@ AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the back AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code") AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") +AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param) From 1c4d839ae9281bc727ba91d12886b12cf1dfc4af Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 15:32:52 +0100 Subject: [PATCH 0090/1764] GPU: Remove bogus placement new leading to memory leak --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 -- GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx | 1 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index b715b08f52b32..35e44d99d5c0c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -86,8 +86,6 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos mMaster = cfg.master; cfg.master->mSlaves.emplace_back(this); } - new (&mProcessingSettings) GPUSettingsProcessing; - new (&mGRPSettings) GPUSettingsGRP; param().SetDefaults(&mGRPSettings); mMemoryScalers.reset(new GPUMemorySizeScalers); for (uint32_t i = 0; i < NSECTORS; i++) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index c47bd488d96ef..aa01d26446b56 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -34,7 +34,6 @@ using namespace o2::gpu; GPUReconstruction* GPUReconstruction::CreateInstance(DeviceType type, bool forceType, GPUReconstruction* master) { GPUSettingsDeviceBackend cfg; - new (&cfg) GPUSettingsDeviceBackend; cfg.deviceType = type; cfg.forceDeviceType = forceType; cfg.master = master; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index f7004d76c726c..40a7fc71cbb4d 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -607,7 +607,7 @@ EndConfig() // Derrived parameters used in GPUParam BeginHiddenConfig(GPUSettingsParam, param) -AddVariableRTC(dAlpha, float, 0.f) // angular size +AddVariableRTC(dAlpha, float, 0.f) // angular size AddVariableRTC(assumeConstantBz, int8_t, 0) // Assume a constant magnetic field AddVariableRTC(toyMCEventsFlag, int8_t, 0) // events were build with home-made event generator AddVariableRTC(continuousTracking, int8_t, 0) // Continuous tracking, estimate bz and errors for abs(z) = 125cm during seeding From 4060a20febe97646422c8e89b1abc1037aca0788 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 10 Mar 2025 17:01:04 +0530 Subject: [PATCH 0091/1764] explicitly add CCDB to some CCDB fatal error strings --- CCDB/include/CCDB/BasicCCDBManager.h | 2 +- Framework/CCDBSupport/src/CCDBHelpers.cxx | 6 +++--- Framework/Core/src/DataRefUtils.cxx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CCDB/include/CCDB/BasicCCDBManager.h b/CCDB/include/CCDB/BasicCCDBManager.h index 678bedf24e551..8af1817718fa2 100644 --- a/CCDB/include/CCDB/BasicCCDBManager.h +++ b/CCDB/include/CCDB/BasicCCDBManager.h @@ -333,7 +333,7 @@ T* CCDBManagerInstance::getSpecificForRun(std::string const& path, int runNumber auto [start, stop] = getRunDuration(runNumber); if (start < 0 || stop < 0) { if (mFatalWhenNull) { - reportFatal(std::string("Failed to get run duration for run ") + std::to_string(runNumber)); + reportFatal(std::string("Failed to get run duration for run ") + std::to_string(runNumber) + std::string(" from CCDB")); } return nullptr; } diff --git a/Framework/CCDBSupport/src/CCDBHelpers.cxx b/Framework/CCDBSupport/src/CCDBHelpers.cxx index 29d446403e1c8..0db4cbb5ac71d 100644 --- a/Framework/CCDBSupport/src/CCDBHelpers.cxx +++ b/Framework/CCDBSupport/src/CCDBHelpers.cxx @@ -219,7 +219,7 @@ auto populateCacheWith(std::shared_ptr const& helper, } else if (meta.defaultValue.get() == 2) { timestampToUse = std::stoi(dtc.runNumber); } else { - LOGP(fatal, "Undefined run-dependent option {} for spec {}/{}/{}", meta.defaultValue.get(), concrete.origin.as(), concrete.description.as(), int(concrete.subSpec)); + LOGP(fatal, "Undefined ccdb-run-dependent option {} for spec {}/{}/{}", meta.defaultValue.get(), concrete.origin.as(), concrete.description.as(), int(concrete.subSpec)); } } else if (isPrefix(ccdbMetadataPrefix, meta.name)) { std::string key = meta.name.substr(ccdbMetadataPrefix.size()); @@ -252,7 +252,7 @@ auto populateCacheWith(std::shared_ptr const& helper, LOGP(detail, "Loading {} for timestamp {}", path, timestampToUse); api.loadFileToMemory(v, path, metadata, timestampToUse, &headers, etag, helper->createdNotAfter, helper->createdNotBefore); if ((headers.count("Error") != 0) || (etag.empty() && v.empty())) { - LOGP(fatal, "Unable to find object {}/{}", path, timestampToUse); + LOGP(fatal, "Unable to find CCDB object {}/{}", path, timestampToUse); // FIXME: I should send a dummy message. continue; } @@ -394,7 +394,7 @@ AlgorithmSpec CCDBHelpers::fetchFromCCDB() helper->lastCheckedTFCounterOrbReset = timingInfo.tfCounter; api.loadFileToMemory(v, path, metadata, timingInfo.creation, &headers, etag, helper->createdNotAfter, helper->createdNotBefore); if ((headers.count("Error") != 0) || (etag.empty() && v.empty())) { - LOGP(fatal, "Unable to find object {}/{}", path, timingInfo.creation); + LOGP(fatal, "Unable to find CCDB object {}/{}", path, timingInfo.creation); // FIXME: I should send a dummy message. return; } diff --git a/Framework/Core/src/DataRefUtils.cxx b/Framework/Core/src/DataRefUtils.cxx index f092429d9c5a0..69eb1dc7faba6 100644 --- a/Framework/Core/src/DataRefUtils.cxx +++ b/Framework/Core/src/DataRefUtils.cxx @@ -94,7 +94,7 @@ void* DataRefUtils::decodeCCDB(DataRef const& ref, std::type_info const& tinfo) headerSize = *reinterpret_cast(buff + dh->payloadSize - Offset); } if (headerSize < 0) { - LOGP(fatal, "Anomalous flattened header size {} extracted", headerSize); + LOGP(fatal, "Anomalous flattened header size {} extracted for CCDB object {}/{}", headerSize, dh->dataOrigin.as(), dh->dataDescription.as()); } TMemFile memFile("name", const_cast(ref.payload), dh->payloadSize - headerSize, "READ"); gErrorIgnoreLevel = previousErrorLevel; @@ -128,7 +128,7 @@ std::map DataRefUtils::extractCCDBHeaders(DataRef cons } if (headerSize < 0) { - LOGP(fatal, "Anomalous flattened header size {} extracted", headerSize); + LOGP(fatal, "Anomalous flattened header size {} extracted for CCDB object {}/{}", headerSize, dh->dataOrigin.as(), dh->dataDescription.as()); } buff += dh->payloadSize - headerSize; // jump to the start of flattened header From caeafb5126c157792dbde64ca9310a86f7cd65b2 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 17 Mar 2025 10:53:31 +0100 Subject: [PATCH 0092/1764] itsresponse: remove incompatible arg in CMake --- Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt index ad26d9e658e16..381e4f4b54c01 100644 --- a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt +++ b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt @@ -29,7 +29,6 @@ endif() add_custom_command(TARGET O2exe-alpide-response-generator POST_BUILD COMMAND ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator -i ${ITSRESPONSE_DIR}/response/AlpideResponseData/ -o ${CMAKE_CURRENT_BINARY_DIR}/ BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/AlpideResponseData.root - DEPENDS alpide-response-generator COMMENT "Generating AlpideResponseData.root" ) From 8c1e88b840a1910ff8a9138c6ed7bde87519a08c Mon Sep 17 00:00:00 2001 From: afurs Date: Sat, 15 Mar 2025 11:59:47 +0100 Subject: [PATCH 0093/1764] DataFormatsFIT: hotfix for LUT, excluded CCDB API from header --- .../Detectors/FIT/common/CMakeLists.txt | 1 + .../include/DataFormatsFIT/LookUpTable.h | 17 +++++------- .../Detectors/FIT/common/src/LookUpTable.cxx | 26 +++++++++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 DataFormats/Detectors/FIT/common/src/LookUpTable.cxx diff --git a/DataFormats/Detectors/FIT/common/CMakeLists.txt b/DataFormats/Detectors/FIT/common/CMakeLists.txt index fc8d975a34023..61dbcabc7f087 100644 --- a/DataFormats/Detectors/FIT/common/CMakeLists.txt +++ b/DataFormats/Detectors/FIT/common/CMakeLists.txt @@ -13,6 +13,7 @@ o2_add_library(DataFormatsFIT SOURCES src/RawEventData.cxx src/Triggers.cxx src/RawDataMetric.cxx + src/LookUpTable.cxx PUBLIC_LINK_LIBRARIES O2::CommonDataFormat O2::DetectorsCommonDataFormats O2::CCDB) diff --git a/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h b/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h index 64ae3dc9653d0..aa4bb1fba8d41 100644 --- a/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h +++ b/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h @@ -15,8 +15,9 @@ // Look Up Table FIT ////////////////////////////////////////////// -#include "CCDB/BasicCCDBManager.h" #include "DetectorsCommonDataFormats/DetID.h" +#include "CommonUtils/NameConf.h" +#include "Framework/Logger.h" #define BOOST_BIND_GLOBAL_PLACEHOLDERS #include #include @@ -158,8 +159,7 @@ enum class EModuleType : int { kUnknown, kTCM }; template , - typename MapEntryPM2ChannelID = std::unordered_map, - typename = typename std::enable_if_t::value>> + typename MapEntryPM2ChannelID = std::unordered_map> class LookupTableBase { public: @@ -174,7 +174,7 @@ class LookupTableBase typedef EntryPM_t Topo_t; // temporary for common interface LookupTableBase() = default; - LookupTableBase(const Table_t& vecEntryFEE) { initFromTable(vecEntryFEE); } + LookupTableBase(const Table_t* vecEntryFEE) { initFromTable(vecEntryFEE); } LookupTableBase(const std::string& pathToFile) { initFromFile(pathToFile); } LookupTableBase(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp = -1) { initCCDB(urlCCDB, pathToStorageInCCDB, timestamp); } // Map of str module names -> enum types @@ -243,13 +243,7 @@ class LookupTableBase prepareEntriesFEE(filepath); prepareLUT(); } - void initCCDB(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp = -1) - { - auto& mgr = o2::ccdb::BasicCCDBManager::instance(); - mgr.setURL(urlCCDB); - mVecEntryFEE = *(mgr.getForTimeStamp(pathToStorageInCCDB, timestamp)); - prepareLUT(); - } + void initCCDB(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp = -1); void initFromTable(const Table_t* vecEntryFEE) { mVecEntryFEE = *vecEntryFEE; @@ -419,6 +413,7 @@ class LookupTableBase Table_t mVecEntryFEE; MapEntryCRU2ModuleType_t mMapEntryCRU2ModuleType; MapEntryPM2ChannelID_t mMapEntryPM2ChannelID; + typedef std::enable_if_t::value> CheckChannelIDtype; // should be integral }; // Singleton for LookUpTable, coomon for all three FIT detectors diff --git a/DataFormats/Detectors/FIT/common/src/LookUpTable.cxx b/DataFormats/Detectors/FIT/common/src/LookUpTable.cxx new file mode 100644 index 0000000000000..73c0b1bf1bb9e --- /dev/null +++ b/DataFormats/Detectors/FIT/common/src/LookUpTable.cxx @@ -0,0 +1,26 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "DataFormatsFIT/LookUpTable.h" +#include "CCDB/BasicCCDBManager.h" +#include +using namespace o2::fit; +template +void LookupTableBase::initCCDB(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp) +{ + + auto& mgr = o2::ccdb::BasicCCDBManager::instance(); + mgr.setURL(urlCCDB); + mVecEntryFEE = *(mgr.getForTimeStamp::Table_t>(pathToStorageInCCDB, timestamp)); + prepareLUT(); +} +template class o2::fit::LookupTableBase, + std::unordered_map>; From 940e2e25ac2830b683ce0d5630b4370596b7a886 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Tue, 18 Mar 2025 10:48:12 +0100 Subject: [PATCH 0094/1764] Add empty skeleton for the TRK digitization (#13959) --- Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt | 3 +- .../ALICE3/TRK/simulation/CMakeLists.txt | 8 +- .../include/TRKSimulation/DPLDigitizerParam.h | 69 +++ .../include/TRKSimulation/Digitizer.h | 128 +++++ .../TRK/simulation/src/DPLDigitizerParam.cxx | 23 + .../ALICE3/TRK/simulation/src/Digitizer.cxx | 467 ++++++++++++++++++ .../TRK/simulation/src/TRKSimulationLinkDef.h | 6 + .../ALICE3/TRK/workflow/CMakeLists.txt | 32 ++ .../include/TRKWorkflow/DigitReaderSpec.h | 87 ++++ .../include/TRKWorkflow/DigitWriterSpec.h | 26 + .../TRK/workflow/src/DigitReaderSpec.cxx | 139 ++++++ .../TRK/workflow/src/DigitWriterSpec.cxx | 110 +++++ Steer/DigitizerWorkflow/CMakeLists.txt | 5 +- .../src/SimpleDigitizerWorkflow.cxx | 13 + .../src/TRKDigitizerSpec.cxx | 303 ++++++++++++ .../DigitizerWorkflow/src/TRKDigitizerSpec.h | 24 + 16 files changed, 1439 insertions(+), 4 deletions(-) create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx create mode 100644 Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx create mode 100644 Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h diff --git a/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt index 83838a01d13f1..645e3149e4ab7 100644 --- a/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt @@ -10,4 +10,5 @@ # or submit itself to any jurisdiction. add_subdirectory(base) -add_subdirectory(simulation) \ No newline at end of file +add_subdirectory(simulation) +add_subdirectory(workflow) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt index c21b7b9aebbf6..856fd310fe5a2 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt @@ -12,16 +12,20 @@ o2_add_library(TRKSimulation SOURCES src/TRKLayer.cxx src/Detector.cxx + src/Digitizer.cxx src/TRKServices.cxx + src/DPLDigitizerParam.cxx src/TRKPetalCase.cxx src/TRKPetalLayer.cxx src/TRKPetalDisk.cxx PUBLIC_LINK_LIBRARIES O2::TRKBase O2::FT3Simulation - O2::ITSMFTSimulation) + O2::ITSMFTSimulation + O2::SimulationDataFormat) o2_target_root_dictionary(TRKSimulation - HEADERS include/TRKSimulation/Detector.h + HEADERS include/TRKSimulation/Digitizer.h + include/TRKSimulation/Detector.h include/TRKSimulation/TRKLayer.h include/TRKSimulation/TRKServices.h include/TRKSimulation/TRKPetalCase.h diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h new file mode 100644 index 0000000000000..59b3551ecbd32 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h @@ -0,0 +1,69 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_TRKDPLDIGITIZERPARAM_H_ +#define ALICEO2_TRKDPLDIGITIZERPARAM_H_ + +#include "DetectorsCommonDataFormats/DetID.h" +#include "CommonUtils/ConfigurableParam.h" +#include "CommonUtils/ConfigurableParamHelper.h" +#include + +namespace o2 +{ +namespace trk +{ +template +struct DPLDigitizerParam : public o2::conf::ConfigurableParamHelper> { + static_assert(N == o2::detectors::DetID::TRK || N == o2::detectors::DetID::FT3, "only DetID::TRK or DetID::FT3 are allowed"); + + static constexpr std::string_view getParamName() + { + return N == o2::detectors::DetID::TRK ? ParamName[0] : ParamName[1]; + } + + bool continuous = true; ///< flag for continuous simulation + float noisePerPixel = DEFNoisePerPixel(); ///< ALPIDE Noise per channel + float strobeFlatTop = 7500.; ///< strobe shape flat top + float strobeMaxRiseTime = 1100.; ///< strobe max rise time + float strobeQRiseTime0 = 450.; ///< q @ which strobe rise time is 0 + + double timeOffset = 0.; ///< time offset (in seconds!) to calculate ROFrame from hit time + int chargeThreshold = 150; ///< charge threshold in Nelectrons + int minChargeToAccount = 15; ///< minimum charge contribution to account + int nSimSteps = 7; ///< number of steps in response simulation + float energyToNElectrons = 1. / 3.6e-9; // conversion of eloss to Nelectrons + + float Vbb = 0.0; ///< back bias absolute value for MFT (in Volt) + float IBVbb = 0.0; ///< back bias absolute value for ITS Inner Barrel (in Volt) + float OBVbb = 0.0; ///< back bias absolute value for ITS Outter Barrel (in Volt) + + std::string noiseFilePath{}; ///< optional noise masks file path. FIXME to be removed once switch to CCDBFetcher + + // boilerplate stuff + make principal key + O2ParamDef(DPLDigitizerParam, getParamName().data()); + + private: + static constexpr float DEFNoisePerPixel() + { + return N == o2::detectors::DetID::TRK ? 1e-8 : 1e-8; // ITS/MFT values here!! + } + + static constexpr std::string_view ParamName[2] = {"TRKDigitizerParam", "FT3DigitizerParam"}; +}; + +template +DPLDigitizerParam DPLDigitizerParam::sInstance; + +} // namespace trk +} // namespace o2 + +#endif diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h new file mode 100644 index 0000000000000..6863c5392cae3 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h @@ -0,0 +1,128 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file Digitizer.h +/// \brief Definition of the TRK digitizer +#ifndef ALICEO2_TRK_DIGITIZER_H +#define ALICEO2_TRK_DIGITIZER_H + +#include +#include +#include + +#include "Rtypes.h" // for Digitizer::Class +#include "TObject.h" // for TObject + +#include "ITSMFTSimulation/ChipDigitsContainer.h" +// #include "ITSMFTSimulation/AlpideSimResponse.h" +#include "ITSMFTSimulation/DigiParams.h" +#include "ITSMFTSimulation/Hit.h" +#include "TRKBase/GeometryTGeo.h" +// #include "ITS3Base/SegmentationSuperAlpide.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "CommonDataFormat/InteractionRecord.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/MCTruthContainer.h" +#endif + +namespace o2::trk +{ + +class Digitizer : public TObject +{ + using ExtraDig = std::vector; ///< container for extra contributions to PreDigits + + public: + void setDigits(std::vector* dig) { mDigits = dig; } + void setMCLabels(o2::dataformats::MCTruthContainer* mclb) { mMCLabels = mclb; } + void setROFRecords(std::vector* rec) { mROFRecords = rec; } + + o2::itsmft::DigiParams& getParams() { return (o2::itsmft::DigiParams&)mParams; } + const o2::itsmft::DigiParams& getParams() const { return mParams; } + + void init(); + + /// Steer conversion of hits to digits + void process(const std::vector* hits, int evID, int srcID); + void setEventTime(const o2::InteractionTimeRecord& irt); + double getEndTimeOfROFMax() const + { + ///< return the time corresponding to end of the last reserved ROFrame : mROFrameMax + return mParams.getROFrameLength() * (mROFrameMax + 1) + mParams.getTimeOffset(); + } + + void setContinuous(bool v) { mParams.setContinuous(v); } + bool isContinuous() const { return mParams.isContinuous(); } + void fillOutputContainer(uint32_t maxFrame = 0xffffffff); + + void setDigiParams(const o2::itsmft::DigiParams& par) { mParams = par; } + const o2::itsmft::DigiParams& getDigitParams() const { return mParams; } + + // provide the common itsmft::GeometryTGeo to access matrices and segmentation + void setGeometry(const o2::trk::GeometryTGeo* gm) { mGeometry = gm; } + + uint32_t getEventROFrameMin() const { return mEventROFrameMin; } + uint32_t getEventROFrameMax() const { return mEventROFrameMax; } + void resetEventROFrames() + { + mEventROFrameMin = 0xffffffff; + mEventROFrameMax = 0; + } + + void setDeadChannelsMap(const o2::itsmft::NoiseMap* mp) { mDeadChanMap = mp; } + + private: + void processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID, int srcID); + void registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, + uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl); + + ExtraDig* getExtraDigBuffer(uint32_t roFrame) + { + if (mROFrameMin > roFrame) { + return nullptr; // nothing to do + } + int ind = roFrame - mROFrameMin; + while (ind >= int(mExtraBuff.size())) { + mExtraBuff.emplace_back(std::make_unique()); + } + return mExtraBuff[ind].get(); + } + + static constexpr float sec2ns = 1e9; + + o2::itsmft::DigiParams mParams; ///< digitization parameters + o2::InteractionTimeRecord mEventTime; ///< global event time and interaction record + o2::InteractionRecord mIRFirstSampledTF; ///< IR of the 1st sampled IR, noise-only ROFs will be inserted till this IR only + double mCollisionTimeWrtROF{}; + uint32_t mROFrameMin = 0; ///< lowest RO frame of current digits + uint32_t mROFrameMax = 0; ///< highest RO frame of current digits + uint32_t mNewROFrame = 0; ///< ROFrame corresponding to provided time + + uint32_t mEventROFrameMin = 0xffffffff; ///< lowest RO frame for processed events (w/o automatic noise ROFs) + uint32_t mEventROFrameMax = 0; ///< highest RO frame forfor processed events (w/o automatic noise ROFs) + + o2::itsmft::AlpideSimResponse* mAlpSimResp = nullptr; // simulated response + + const o2::trk::GeometryTGeo* mGeometry = nullptr; ///< TRK geometry + + std::vector mChips; ///< Array of chips digits containers + std::deque> mExtraBuff; ///< burrer (per roFrame) for extra digits + + std::vector* mDigits = nullptr; //! output digits + std::vector* mROFRecords = nullptr; //! output ROF records + o2::dataformats::MCTruthContainer* mMCLabels = nullptr; //! output labels + + const o2::itsmft::NoiseMap* mDeadChanMap = nullptr; + + ClassDef(Digitizer, 1); +}; +} // namespace o2::trk \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx new file mode 100644 index 0000000000000..a13f2e58bd3a4 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx @@ -0,0 +1,23 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKSimulation/DPLDigitizerParam.h" + +namespace o2 +{ +namespace trk +{ +// this makes sure that the constructor of the parameters is statically called +// so that these params are part of the parameter database +static auto& sDigitizerParamITS = o2::trk::DPLDigitizerParam::Instance(); +static auto& sDigitizerParamMFT = o2::trk::DPLDigitizerParam::Instance(); +} // namespace trk +} // namespace o2 diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx new file mode 100644 index 0000000000000..21e6e629ec418 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx @@ -0,0 +1,467 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file Digitizer.cxx + +#include "DataFormatsITSMFT/Digit.h" +// #include "ITSMFTBase/SegmentationAlpide.h" +#include "TRKSimulation/DPLDigitizerParam.h" +#include "TRKSimulation/Digitizer.h" +// #include "MathUtils/Cartesian.h" +// #include "SimulationDataFormat/MCTruthContainer.h" +// #include "DetectorsRaw/HBFUtils.h" + +// #include +// #include +// #include +// #include +#include // for LOG + +using o2::itsmft::Digit; +using o2::itsmft::Hit; +// using Segmentation = o2::itsmft::SegmentationAlpide; + +using namespace o2::trk; +// using namespace o2::base; + +//_______________________________________________________________________ +void Digitizer::init() +{ + // mNumberOfChips = mGeometry->getNumberOfChips(); + // mChips.resize(mNumberOfChips); + // for (int i = mNumberOfChips; i--;) { + // mChips[i].setChipIndex(i); + // if (mNoiseMap) { + // mChips[i].setNoiseMap(mNoiseMap); + // } + // if (mDeadChanMap) { + // mChips[i].disable(mDeadChanMap->isFullChipMasked(i)); + // mChips[i].setDeadChanMap(mDeadChanMap); + // } + // } + // initializing for both collection tables + /*for (int i = 0; i < 2; i++) { + mAlpSimResp[i].initData(i); + }*/ + + // importing the charge collection tables + // (initialized while building O2) + // auto file = TFile::Open(mResponseFile.data()); + // if (!file) { + // LOG(fatal) << "Cannot open response file " << mResponseFile; + // } + /*std::string response = "response"; + for (int i=0; i<2; i++) { + response.append(std::to_string(i)); + mAlpSimResp[i] = *(o2::itsmft::AlpideSimResponse*)file->Get(response.data()); + }*/ + // mAlpSimResp[0] = *(o2::itsmft::AlpideSimResponse*)file->Get("response0"); + // mAlpSimResp[1] = *(o2::itsmft::AlpideSimResponse*)file->Get("response1"); + + // importing the parameters from DPLDigitizerParam.h + auto& dOptTRK = DPLDigitizerParam::Instance(); + + LOGP(info, "TRK Digitizer is initalised."); +} + +// auto Digitizer::getChipResponse(int chipID) +// { +// if (mNumberOfChips < 10000) { // in MFT +// return mAlpSimRespMFT; +// } + +// if (chipID < 432) { // in ITS Inner Barrel +// return mAlpSimRespIB; +// } else { // in ITS Outter Barrel +// return mAlpSimRespOB; +// } +// } + +//_______________________________________________________________________ +void Digitizer::process(const std::vector* hits, int evID, int srcID) +{ + // digitize single event, the time must have been set beforehand + + // LOG(info) << "Digitizing " << mGeometry->getName() << " hits of entry " << evID << " from source " + // << srcID << " at time " << mEventTime << " ROFrame= " << mNewROFrame << ")" + // << " cont.mode: " << isContinuous() + // << " Min/Max ROFrames " << mROFrameMin << "/" << mROFrameMax; + + // // is there something to flush ? + // if (mNewROFrame > mROFrameMin) { + // fillOutputContainer(mNewROFrame - 1); // flush out all frame preceding the new one + // } + + // int nHits = hits->size(); + // std::vector hitIdx(nHits); + // std::iota(std::begin(hitIdx), std::end(hitIdx), 0); + // // sort hits to improve memory access + // std::sort(hitIdx.begin(), hitIdx.end(), + // [hits](auto lhs, auto rhs) { + // return (*hits)[lhs].GetDetectorID() < (*hits)[rhs].GetDetectorID(); + // }); + // for (int i : hitIdx) { + // processHit((*hits)[i], mROFrameMax, evID, srcID); + // } + // // in the triggered mode store digits after every MC event + // // TODO: in the real triggered mode this will not be needed, this is actually for the + // // single event processing only + // if (!mParams.isContinuous()) { + // fillOutputContainer(mROFrameMax); + // } +} + +//_______________________________________________________________________ +void Digitizer::setEventTime(const o2::InteractionTimeRecord& irt) +{ + // // assign event time in ns + // mEventTime = irt; + // if (!mParams.isContinuous()) { + // mROFrameMin = 0; // in triggered mode reset the frame counters + // mROFrameMax = 0; + // } + // // RO frame corresponding to provided time + // mCollisionTimeWrtROF = mEventTime.timeInBCNS; // in triggered mode the ROF starts at BC (is there a delay?) + // if (mParams.isContinuous()) { + // auto nbc = mEventTime.differenceInBC(mIRFirstSampledTF); + // if (mCollisionTimeWrtROF < 0 && nbc > 0) { + // nbc--; + // } + + // // we might get interactions to digitize from before + // // the first sampled IR + // if (nbc < 0) { + // mNewROFrame = 0; + // // this event is before the first RO + // mIsBeforeFirstRO = true; + // } else { + // mNewROFrame = nbc / mParams.getROFrameLengthInBC(); + // mIsBeforeFirstRO = false; + // } + // LOG(info) << " NewROFrame " << mNewROFrame << " nbc " << nbc; + + // // in continuous mode depends on starts of periodic readout frame + // mCollisionTimeWrtROF += (nbc % mParams.getROFrameLengthInBC()) * o2::constants::lhc::LHCBunchSpacingNS; + // } else { + // mNewROFrame = 0; + // } + + // if (mNewROFrame < mROFrameMin) { + // LOG(error) << "New ROFrame " << mNewROFrame << " (" << irt << ") precedes currently cashed " << mROFrameMin; + // throw std::runtime_error("deduced ROFrame precedes already processed one"); + // } + + // if (mParams.isContinuous() && mROFrameMax < mNewROFrame) { + // mROFrameMax = mNewROFrame - 1; // all frames up to this are finished + // } +} + +//_______________________________________________________________________ +void Digitizer::fillOutputContainer(uint32_t frameLast) +{ + // // fill output with digits from min.cached up to requested frame, generating the noise beforehand + // if (frameLast > mROFrameMax) { + // frameLast = mROFrameMax; + // } + // // make sure all buffers for extra digits are created up to the maxFrame + // getExtraDigBuffer(mROFrameMax); + + // LOG(info) << "Filling " << mGeometry->getName() << " digits output for RO frames " << mROFrameMin << ":" + // << frameLast; + + // o2::itsmft::ROFRecord rcROF; + + // // we have to write chips in RO increasing order, therefore have to loop over the frames here + // for (; mROFrameMin <= frameLast; mROFrameMin++) { + // rcROF.setROFrame(mROFrameMin); + // rcROF.setFirstEntry(mDigits->size()); // start of current ROF in digits + + // auto& extra = *(mExtraBuff.front().get()); + // for (auto& chip : mChips) { + // if (chip.isDisabled()) { + // continue; + // } + // chip.addNoise(mROFrameMin, mROFrameMin, &mParams); + // auto& buffer = chip.getPreDigits(); + // if (buffer.empty()) { + // continue; + // } + // auto itBeg = buffer.begin(); + // auto iter = itBeg; + // ULong64_t maxKey = chip.getOrderingKey(mROFrameMin + 1, 0, 0) - 1; // fetch digits with key below that + // for (; iter != buffer.end(); ++iter) { + // if (iter->first > maxKey) { + // break; // is the digit ROFrame from the key > the max requested frame + // } + // auto& preDig = iter->second; // preDigit + // if (preDig.charge >= mParams.getChargeThreshold()) { + // int digID = mDigits->size(); + // mDigits->emplace_back(chip.getChipIndex(), preDig.row, preDig.col, preDig.charge); + // mMCLabels->addElement(digID, preDig.labelRef.label); + // auto& nextRef = preDig.labelRef; // extra contributors are in extra array + // while (nextRef.next >= 0) { + // nextRef = extra[nextRef.next]; + // mMCLabels->addElement(digID, nextRef.label); + // } + // } + // } + // buffer.erase(itBeg, iter); + // } + // // finalize ROF record + // rcROF.setNEntries(mDigits->size() - rcROF.getFirstEntry()); // number of digits + // if (isContinuous()) { + // rcROF.getBCData().setFromLong(mIRFirstSampledTF.toLong() + mROFrameMin * mParams.getROFrameLengthInBC()); + // } else { + // rcROF.getBCData() = mEventTime; // RSTODO do we need to add trigger delay? + // } + // if (mROFRecords) { + // mROFRecords->push_back(rcROF); + // } + // extra.clear(); // clear container for extra digits of the mROFrameMin ROFrame + // // and move it as a new slot in the end + // mExtraBuff.emplace_back(mExtraBuff.front().release()); + // mExtraBuff.pop_front(); + // } +} + +//_______________________________________________________________________ +void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID, int srcID) +{ + // // convert single hit to digits + // int chipID = hit.GetDetectorID(); + // auto& chip = mChips[chipID]; + // if (chip.isDisabled()) { + // LOG(debug) << "skip disabled chip " << chipID; + // return; + // } + // float timeInROF = hit.GetTime() * sec2ns; + // if (timeInROF > 20e3) { + // const int maxWarn = 10; + // static int warnNo = 0; + // if (warnNo < maxWarn) { + // LOG(warning) << "Ignoring hit with time_in_event = " << timeInROF << " ns" + // << ((++warnNo < maxWarn) ? "" : " (suppressing further warnings)"); + // } + // return; + // } + // if (isContinuous()) { + // timeInROF += mCollisionTimeWrtROF; + // } + // if (mIsBeforeFirstRO && timeInROF < 0) { + // // disregard this hit because it comes from an event before readout starts and it does not effect this RO + // return; + // } + + // // calculate RO Frame for this hit + // if (timeInROF < 0) { + // timeInROF = 0.; + // } + // float tTot = mParams.getSignalShape().getMaxDuration(); + // // frame of the hit signal start wrt event ROFrame + // int roFrameRel = int(timeInROF * mParams.getROFrameLengthInv()); + // // frame of the hit signal end wrt event ROFrame: in the triggered mode we read just 1 frame + // uint32_t roFrameRelMax = mParams.isContinuous() ? (timeInROF + tTot) * mParams.getROFrameLengthInv() : roFrameRel; + // int nFrames = roFrameRelMax + 1 - roFrameRel; + // uint32_t roFrameMax = mNewROFrame + roFrameRelMax; + // if (roFrameMax > maxFr) { + // maxFr = roFrameMax; // if signal extends beyond current maxFrame, increase the latter + // } + + // // here we start stepping in the depth of the sensor to generate charge diffusion + // float nStepsInv = mParams.getNSimStepsInv(); + // int nSteps = mParams.getNSimSteps(); + // const auto& matrix = mGeometry->getMatrixL2G(hit.GetDetectorID()); + // math_utils::Vector3D xyzLocS(matrix ^ (hit.GetPosStart())); // start position in sensor frame + // math_utils::Vector3D xyzLocE(matrix ^ (hit.GetPos())); // end position in sensor frame + + // math_utils::Vector3D step(xyzLocE); + // step -= xyzLocS; + // step *= nStepsInv; // position increment at each step + // // the electrons will injected in the middle of each step + // math_utils::Vector3D stepH(step * 0.5); + // xyzLocS += stepH; + // xyzLocE -= stepH; + + // int rowS = -1, colS = -1, rowE = -1, colE = -1, nSkip = 0; + // // get entrance pixel row and col + // while (!Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + // if (++nSkip >= nSteps) { + // return; // did not enter to sensitive matrix + // } + // xyzLocS += step; + // } + // // get exit pixel row and col + // while (!Segmentation::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + // if (++nSkip >= nSteps) { + // return; // did not enter to sensitive matrix + // } + // xyzLocE -= step; + // } + // // estimate the limiting min/max row and col where the non-0 response is possible + // if (rowS > rowE) { + // std::swap(rowS, rowE); + // } + // if (colS > colE) { + // std::swap(colS, colE); + // } + // rowS -= AlpideRespSimMat::NPix / 2; + // rowE += AlpideRespSimMat::NPix / 2; + // if (rowS < 0) { + // rowS = 0; + // } + // if (rowE >= Segmentation::NRows) { + // rowE = Segmentation::NRows - 1; + // } + // colS -= AlpideRespSimMat::NPix / 2; + // colE += AlpideRespSimMat::NPix / 2; + // if (colS < 0) { + // colS = 0; + // } + // if (colE >= Segmentation::NCols) { + // colE = Segmentation::NCols - 1; + // } + // int rowSpan = rowE - rowS + 1, colSpan = colE - colS + 1; // size of plaquet where some response is expected + + // float respMatrix[rowSpan][colSpan]; // response accumulated here + // std::fill(&respMatrix[0][0], &respMatrix[0][0] + rowSpan * colSpan, 0.f); + + // float nElectrons = hit.GetEnergyLoss() * mParams.getEnergyToNElectrons(); // total number of deposited electrons + // nElectrons *= nStepsInv; // N electrons injected per step + // if (nSkip) { + // nSteps -= nSkip; + // } + // // + // int rowPrev = -1, colPrev = -1, row, col; + // float cRowPix = 0.f, cColPix = 0.f; // local coordinated of the current pixel center + + // const o2::itsmft::AlpideSimResponse* resp = getChipResponse(chipID); + + // // take into account that the AlpideSimResponse depth defintion has different min/max boundaries + // // although the max should coincide with the surface of the epitaxial layer, which in the chip + // // local coordinates has Y = +SensorLayerThickness/2 + + // xyzLocS.SetY(xyzLocS.Y() + resp->getDepthMax() - Segmentation::SensorLayerThickness / 2.); + + // // collect charge in every pixel which might be affected by the hit + // for (int iStep = nSteps; iStep--;) { + // // Get the pixel ID + // Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + // if (row != rowPrev || col != colPrev) { // update pixel and coordinates of its center + // if (!Segmentation::detectorToLocal(row, col, cRowPix, cColPix)) { + // continue; // should not happen + // } + // rowPrev = row; + // colPrev = col; + // } + // bool flipCol, flipRow; + // // note that response needs coordinates along column row (locX) (locZ) then depth (locY) + // auto rspmat = resp->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol); + + // xyzLocS += step; + // if (!rspmat) { + // continue; + // } + + // for (int irow = AlpideRespSimMat::NPix; irow--;) { + // int rowDest = row + irow - AlpideRespSimMat::NPix / 2 - rowS; // destination row in the respMatrix + // if (rowDest < 0 || rowDest >= rowSpan) { + // continue; + // } + // for (int icol = AlpideRespSimMat::NPix; icol--;) { + // int colDest = col + icol - AlpideRespSimMat::NPix / 2 - colS; // destination column in the respMatrix + // if (colDest < 0 || colDest >= colSpan) { + // continue; + // } + // respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, flipRow, flipCol); + // } + // } + // } + + // // fire the pixels assuming Poisson(n_response_electrons) + // o2::MCCompLabel lbl(hit.GetTrackID(), evID, srcID, false); + // auto roFrameAbs = mNewROFrame + roFrameRel; + // for (int irow = rowSpan; irow--;) { + // uint16_t rowIS = irow + rowS; + // for (int icol = colSpan; icol--;) { + // float nEleResp = respMatrix[irow][icol]; + // if (!nEleResp) { + // continue; + // } + // int nEle = gRandom->Poisson(nElectrons * nEleResp); // total charge in given pixel + // // ignore charge which have no chance to fire the pixel + // if (nEle < mParams.getMinChargeToAccount()) { + // continue; + // } + // uint16_t colIS = icol + colS; + // if (mNoiseMap && mNoiseMap->isNoisy(chipID, rowIS, colIS)) { + // continue; + // } + // if (mDeadChanMap && mDeadChanMap->isNoisy(chipID, rowIS, colIS)) { + // continue; + // } + // // + // registerDigits(chip, roFrameAbs, timeInROF, nFrames, rowIS, colIS, nEle, lbl); + // } + // } +} + +//________________________________________________________________________________ +void Digitizer::registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, + uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl) +{ + // Register digits for given pixel, accounting for the possible signal contribution to + // multiple ROFrame. The signal starts at time tInROF wrt the start of provided roFrame + // In every ROFrame we check the collected signal during strobe + + // float tStrobe = mParams.getStrobeDelay() - tInROF; // strobe start wrt signal start + // for (int i = 0; i < nROF; i++) { + // uint32_t roFr = roFrame + i; + // int nEleROF = mParams.getSignalShape().getCollectedCharge(nEle, tStrobe, tStrobe + mParams.getStrobeLength()); + // tStrobe += mParams.getROFrameLength(); // for the next ROF + + // // discard too small contributions, they have no chance to produce a digit + // if (nEleROF < mParams.getMinChargeToAccount()) { + // continue; + // } + // if (roFr > mEventROFrameMax) { + // mEventROFrameMax = roFr; + // } + // if (roFr < mEventROFrameMin) { + // mEventROFrameMin = roFr; + // } + // auto key = chip.getOrderingKey(roFr, row, col); + // PreDigit* pd = chip.findDigit(key); + // if (!pd) { + // chip.addDigit(key, roFr, row, col, nEleROF, lbl); + // } else { // there is already a digit at this slot, account as PreDigitExtra contribution + // pd->charge += nEleROF; + // if (pd->labelRef.label == lbl) { // don't store the same label twice + // continue; + // } + // ExtraDig* extra = getExtraDigBuffer(roFr); + // int& nxt = pd->labelRef.next; + // bool skip = false; + // while (nxt >= 0) { + // if ((*extra)[nxt].label == lbl) { // don't store the same label twice + // skip = true; + // break; + // } + // nxt = (*extra)[nxt].next; + // } + // if (skip) { + // continue; + // } + // // new predigit will be added in the end of the chain + // nxt = extra->size(); + // extra->emplace_back(lbl); + // } + // } +} diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h index b82d8879e7dad..876810b5bef9d 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h @@ -22,5 +22,11 @@ #pragma link C++ class o2::trk::TRKServices + ; #pragma link C++ class o2::trk::Detector + ; #pragma link C++ class o2::base::DetImpl < o2::trk::Detector> + ; +#pragma link C++ class o2::trk::Digitizer + ; + +// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; +// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; +// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK>> + ; +// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3>> + ; #endif diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt new file mode 100644 index 0000000000000..c9f4099017717 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +o2_add_library(TRKWorkflow + SOURCES src/DigitReaderSpec.cxx + src/DigitWriterSpec.cxx + # src/RecoWorkflow.cxx + # src/ClusterWriterWorkflow.cxx + # src/ClustererSpec.cxx + # src/ClusterWriterSpec.cxx + # src/TrackerSpec.cxx + # src/TrackWriterSpec.cxx + # src/TrackReaderSpec.cxx + # src/VertexReaderSpec.cxx + PUBLIC_LINK_LIBRARIES O2::Framework + O2::SimConfig + O2::DataFormatsITSMFT + O2::SimulationDataFormat + O2::DPLUtils) + +# o2_add_executable(reco-workflow +# SOURCES src/trk-reco-workflow.cxx +# COMPONENT_NAME alice3-trk +# PUBLIC_LINK_LIBRARIES O2::TRKWorkflow) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h new file mode 100644 index 0000000000000..2a0acd792f4a9 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h @@ -0,0 +1,87 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_TRK_DIGITREADER +#define O2_TRK_DIGITREADER + +#include "TFile.h" +#include "TTree.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/GBTCalibData.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "Framework/DataProcessorSpec.h" +#include "Framework/Task.h" +#include "Headers/DataHeader.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "DetectorsCommonDataFormats/DetID.h" + +using namespace o2::framework; + +namespace o2 +{ +namespace trk +{ + +class DigitReader : public Task +{ + public: + DigitReader() = delete; + DigitReader(o2::detectors::DetID id, bool useMC, bool useCalib); + ~DigitReader() override = default; + void init(InitContext& ic) final; + void run(ProcessingContext& pc) final; + + protected: + void connectTree(const std::string& filename); + + std::vector mDigits, *mDigitsPtr = &mDigits; + std::vector mCalib, *mCalibPtr = &mCalib; + std::vector mDigROFRec, *mDigROFRecPtr = &mDigROFRec; + std::vector mDigMC2ROFs, *mDigMC2ROFsPtr = &mDigMC2ROFs; + + o2::header::DataOrigin mOrigin = o2::header::gDataOriginInvalid; + + std::unique_ptr mFile; + std::unique_ptr mTree; + + bool mUseMC = true; // use MC truth + bool mUseCalib = true; // send calib data + + std::string mDetName = ""; + std::string mDetNameLC = ""; + std::string mFileName = ""; + std::string mDigTreeName = "o2sim"; + std::string mDigitBranchName = "Digit"; + std::string mDigROFBranchName = "DigitROF"; + std::string mCalibBranchName = "Calib"; + + std::string mDigtMCTruthBranchName = "DigitMCTruth"; + std::string mDigtMC2ROFBranchName = "DigitMC2ROF"; +}; + +class TRKDigitReader : public DigitReader +{ + public: + TRKDigitReader(bool useMC = true, bool useCalib = false) + : DigitReader(o2::detectors::DetID::TRK, useMC, useCalib) + { + mOrigin = o2::header::gDataOriginTRK; + } +}; + +/// create a processor spec +/// read ITS/MFT Digit data from a root file +framework::DataProcessorSpec getTRKDigitReaderSpec(bool useMC = true, bool useCalib = false, std::string defname = "trkdigits.root"); + +} // namespace trk +} // namespace o2 + +#endif /* O2_TRK_DigitREADER */ diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h new file mode 100644 index 0000000000000..9c37d4318bb0f --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h @@ -0,0 +1,26 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef STEER_TRKDIGITWRITER_H_ +#define STEER_TRKDIGITWRITER_H_ + +#include "Framework/DataProcessorSpec.h" + +namespace o2 +{ +namespace trk +{ + +o2::framework::DataProcessorSpec getTRKDigitWriterSpec(bool mctruth = true, bool dec = false, bool calib = false); +} // namespace trk +} // end namespace o2 + +#endif /* STEER_TRKDIGITWRITER_H_ */ diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx new file mode 100644 index 0000000000000..09bb1f12a48e4 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx @@ -0,0 +1,139 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include + +#include "TTree.h" + +#include "Framework/ControlService.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/Logger.h" +#include "TRKWorkflow/DigitReaderSpec.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "SimulationDataFormat/IOMCTruthContainerView.h" +#include + +using namespace o2::framework; +using namespace o2::itsmft; + +namespace o2 +{ +namespace trk +{ + +DigitReader::DigitReader(o2::detectors::DetID id, bool useMC, bool useCalib) +{ + assert(id == o2::detectors::DetID::TRK); + mDetNameLC = mDetName = id.getName(); + mDigTreeName = "o2sim"; + + mDigitBranchName = mDetName + mDigitBranchName; + mDigROFBranchName = mDetName + mDigROFBranchName; + mCalibBranchName = mDetName + mCalibBranchName; + + mDigtMCTruthBranchName = mDetName + mDigtMCTruthBranchName; + mDigtMC2ROFBranchName = mDetName + mDigtMC2ROFBranchName; + + mUseMC = useMC; + mUseCalib = useCalib; + std::transform(mDetNameLC.begin(), mDetNameLC.end(), mDetNameLC.begin(), ::tolower); +} + +void DigitReader::init(InitContext& ic) +{ + mFileName = ic.options().get((mDetNameLC + "-digit-infile").c_str()); + connectTree(mFileName); +} + +void DigitReader::run(ProcessingContext& pc) +{ + auto ent = mTree->GetReadEntry() + 1; + assert(ent < mTree->GetEntries()); // this should not happen + + o2::dataformats::IOMCTruthContainerView* plabels = nullptr; + if (mUseMC) { + mTree->SetBranchAddress(mDigtMCTruthBranchName.c_str(), &plabels); + } + mTree->GetEntry(ent); + LOG(info) << mDetName << "DigitReader pushes " << mDigROFRec.size() << " ROFRecords, " + << mDigits.size() << " digits at entry " << ent; + + // This is a very ugly way of providing DataDescription, which anyway does not need to contain detector name. + // To be fixed once the names-definition class is ready + pc.outputs().snapshot(Output{mOrigin, "DIGITSROF", 0}, mDigROFRec); + pc.outputs().snapshot(Output{mOrigin, "DIGITS", 0}, mDigits); + if (mUseCalib) { + pc.outputs().snapshot(Output{mOrigin, "GBTCALIB", 0}, mCalib); + } + + if (mUseMC) { + auto& sharedlabels = pc.outputs().make>(Output{mOrigin, "DIGITSMCTR", 0}); + plabels->copyandflatten(sharedlabels); + delete plabels; + pc.outputs().snapshot(Output{mOrigin, "DIGITSMC2ROF", 0}, mDigMC2ROFs); + } + + if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { + pc.services().get().endOfStream(); + pc.services().get().readyToQuit(QuitRequest::Me); + } +} + +void DigitReader::connectTree(const std::string& filename) +{ + mTree.reset(nullptr); // in case it was already loaded + mFile.reset(TFile::Open(filename.c_str())); + assert(mFile && !mFile->IsZombie()); + mTree.reset((TTree*)mFile->Get(mDigTreeName.c_str())); + assert(mTree); + + mTree->SetBranchAddress(mDigROFBranchName.c_str(), &mDigROFRecPtr); + mTree->SetBranchAddress(mDigitBranchName.c_str(), &mDigitsPtr); + if (mUseCalib) { + if (!mTree->GetBranch(mCalibBranchName.c_str())) { + throw std::runtime_error("GBT calibration data requested but not found in the tree"); + } + mTree->SetBranchAddress(mCalibBranchName.c_str(), &mCalibPtr); + } + if (mUseMC) { + if (!mTree->GetBranch(mDigtMC2ROFBranchName.c_str()) || !mTree->GetBranch(mDigtMCTruthBranchName.c_str())) { + throw std::runtime_error("MC data requested but not found in the tree"); + } + mTree->SetBranchAddress(mDigtMC2ROFBranchName.c_str(), &mDigMC2ROFsPtr); + } + LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; +} + +DataProcessorSpec getTRKDigitReaderSpec(bool useMC, bool useCalib, std::string defname) +{ + std::vector outputSpec; + outputSpec.emplace_back("TRK", "DIGITS", 0, Lifetime::Timeframe); + outputSpec.emplace_back("TRK", "DIGITSROF", 0, Lifetime::Timeframe); + if (useCalib) { + outputSpec.emplace_back("TRK", "GBTCALIB", 0, Lifetime::Timeframe); + } + if (useMC) { + outputSpec.emplace_back("TRK", "DIGITSMCTR", 0, Lifetime::Timeframe); + outputSpec.emplace_back("TRK", "DIGITSMC2ROF", 0, Lifetime::Timeframe); + } + + return DataProcessorSpec{ + "trk-digit-reader", + Inputs{}, + outputSpec, + AlgorithmSpec{adaptFromTask(useMC, useCalib)}, + Options{ + {"trk-digit-infile", VariantType::String, defname, {"Name of the input digit file"}}}}; +} + +} // namespace trk +} // namespace o2 diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx new file mode 100644 index 0000000000000..2a743551adddb --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx @@ -0,0 +1,110 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @brief Processor spec for a ROOT file writer for ITSMFT digits + +#include "TRKWorkflow/DigitWriterSpec.h" +#include "DPLUtils/MakeRootTreeWriterSpec.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/GBTCalibData.h" +#include "Headers/DataHeader.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "SimulationDataFormat/IOMCTruthContainerView.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include +#include +#include + +using namespace o2::framework; +using SubSpecificationType = o2::framework::DataAllocator::SubSpecificationType; + +namespace o2 +{ +namespace trk +{ + +template +using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition; +using MCCont = o2::dataformats::ConstMCTruthContainer; + +/// create the processor spec +/// describing a processor receiving digits for ITS/MFT and writing them to file +DataProcessorSpec getDigitWriterSpec(bool mctruth, bool dec, bool calib, o2::header::DataOrigin detOrig, o2::detectors::DetID detId) +{ + std::string detStr = o2::detectors::DetID::getName(detId); + std::string detStrL = dec ? "o2_" : ""; // for decoded digits prepend by o2 + detStrL += detStr; + std::transform(detStrL.begin(), detStrL.end(), detStrL.begin(), ::tolower); + auto logger = [](std::vector const& inDigits) { + LOG(info) << "RECEIVED DIGITS SIZE " << inDigits.size(); + }; + + // the callback to be set as hook for custom action when the writer is closed + auto finishWriting = [](TFile* outputfile, TTree* outputtree) { + const auto* brArr = outputtree->GetListOfBranches(); + int64_t nent = 0; + for (const auto* brc : *brArr) { + int64_t n = ((const TBranch*)brc)->GetEntries(); + if (nent && (nent != n)) { + LOG(error) << "Branches have different number of entries"; + } + nent = n; + } + outputtree->SetEntries(nent); + outputtree->Write("", TObject::kOverwrite); + outputfile->Close(); + }; + + // handler for labels + // This is necessary since we can't store the original label buffer in a ROOT entry -- as is -- if it exceeds a certain size. + // We therefore convert it to a special split class. + auto fillLabels = [](TBranch& branch, std::vector const& labelbuffer, DataRef const& /*ref*/) { + o2::dataformats::ConstMCTruthContainerView labels(labelbuffer); + LOG(info) << "WRITING " << labels.getNElements() << " LABELS "; + + o2::dataformats::IOMCTruthContainerView outputcontainer; + auto ptr = &outputcontainer; + auto br = framework::RootTreeWriter::remapBranch(branch, &ptr); + outputcontainer.adopt(labelbuffer); + br->Fill(); + br->ResetAddress(); + }; + + return MakeRootTreeWriterSpec((detStr + "DigitWriter" + (dec ? "_dec" : "")).c_str(), + (detStrL + "digits.root").c_str(), + MakeRootTreeWriterSpec::TreeAttributes{"o2sim", "Digits tree"}, + MakeRootTreeWriterSpec::CustomClose(finishWriting), + // in case of labels we first read them as std::vector and process them correctly in the fillLabels hook + BranchDefinition>{InputSpec{"digitsMCTR", detOrig, "DIGITSMCTR", 0}, + (detStr + "DigitMCTruth").c_str(), + (mctruth ? 1 : 0), fillLabels}, + BranchDefinition>{InputSpec{"digitsMC2ROF", detOrig, "DIGITSMC2ROF", 0}, + (detStr + "DigitMC2ROF").c_str(), + (mctruth ? 1 : 0)}, + BranchDefinition>{InputSpec{"digits", detOrig, "DIGITS", 0}, + (detStr + "Digit").c_str(), + logger}, + BranchDefinition>{InputSpec{"calib", detOrig, "GBTCALIB", 0}, + (detStr + "Calib").c_str(), + (calib ? 1 : 0)}, + BranchDefinition>{InputSpec{"digitsROF", detOrig, "DIGITSROF", 0}, + (detStr + "DigitROF").c_str()})(); +} + +DataProcessorSpec getTRKDigitWriterSpec(bool mctruth, bool dec, bool calib) +{ + return getDigitWriterSpec(mctruth, dec, calib, o2::header::gDataOriginTRK, o2::detectors::DetID::TRK); +} + +} // end namespace trk +} // end namespace o2 diff --git a/Steer/DigitizerWorkflow/CMakeLists.txt b/Steer/DigitizerWorkflow/CMakeLists.txt index 1b839ba462b63..babc5fce4d864 100644 --- a/Steer/DigitizerWorkflow/CMakeLists.txt +++ b/Steer/DigitizerWorkflow/CMakeLists.txt @@ -29,6 +29,7 @@ o2_add_executable(digitizer-workflow src/ZDCDigitizerSpec.cxx src/TOFDigitizerSpec.cxx $<$:src/ITS3DigitizerSpec.cxx> + $<$:src/TRKDigitizerSpec.cxx> PUBLIC_LINK_LIBRARIES O2::Framework O2::Steer O2::CommonConstants @@ -67,7 +68,9 @@ o2_add_executable(digitizer-workflow O2::DetectorsRaw $<$:O2::ITS3Simulation> $<$:O2::ITS3Workflow> - $<$:O2::ITS3Align>) + $<$:O2::ITS3Align> + $<$:O2::TRKSimulation> + $<$:O2::TRKWorkflow>) o2_add_executable(mctruth-testworkflow diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index a30294a240fb0..75141425f7c49 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -44,6 +44,10 @@ // for ITS3 #include "ITS3DigitizerSpec.h" #include "ITS3Workflow/DigitWriterSpec.h" + +// for alice 3 TRK +#include "TRKDigitizerSpec.h" +#include "TRKWorkflow/DigitWriterSpec.h" #endif // for TOF @@ -632,6 +636,15 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) // // connect ITS digit writer specs.emplace_back(o2::its3::getITS3DigitWriterSpec(mctruth)); } + + // the ALICE 3 TRK part + if (isEnabled(o2::detectors::DetID::TRK)) { + detList.emplace_back(o2::detectors::DetID::TRK); + // connect the ALICE 3 TRK digitization + specs.emplace_back(o2::trk::getTRKDigitizerSpec(fanoutsize++, mctruth)); + // connect the ALICE 3 TRK digit writer + specs.emplace_back(o2::trk::getTRKDigitWriterSpec(mctruth)); + } #endif // the MFT part diff --git a/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx new file mode 100644 index 0000000000000..f35b53a58fae4 --- /dev/null +++ b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx @@ -0,0 +1,303 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKDigitizerSpec.h" +#include "Framework/ControlService.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/CCDBParamSpec.h" +#include "Framework/DataProcessorSpec.h" +#include "Framework/DataRefUtils.h" +#include "Framework/Lifetime.h" +#include "Framework/Task.h" +#include "Steer/HitProcessingManager.h" +#include "DataFormatsITSMFT/Digit.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "DetectorsBase/BaseDPLDigitizer.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DetectorsCommonDataFormats/SimTraits.h" +#include "DataFormatsParameters/GRPObject.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "TRKSimulation/Digitizer.h" +#include "TRKSimulation/DPLDigitizerParam.h" +#include "ITSMFTBase/DPLAlpideParam.h" +#include "TRKBase/GeometryTGeo.h" +#include "TRKBase/TRKBaseParam.h" + +#include +#include + +#include + +using namespace o2::framework; +using SubSpecificationType = o2::framework::DataAllocator::SubSpecificationType; + +namespace +{ +std::vector makeOutChannels(o2::header::DataOrigin detOrig, bool mctruth) +{ + std::vector outputs; + outputs.emplace_back(detOrig, "DIGITS", 0, Lifetime::Timeframe); + outputs.emplace_back(detOrig, "DIGITSROF", 0, Lifetime::Timeframe); + if (mctruth) { + outputs.emplace_back(detOrig, "DIGITSMC2ROF", 0, Lifetime::Timeframe); + outputs.emplace_back(detOrig, "DIGITSMCTR", 0, Lifetime::Timeframe); + } + outputs.emplace_back(detOrig, "ROMode", 0, Lifetime::Timeframe); + return outputs; +} +} // namespace + +namespace o2::trk +{ +using namespace o2::base; +class TRKDPLDigitizerTask : BaseDPLDigitizer +{ + public: + using BaseDPLDigitizer::init; + + TRKDPLDigitizerTask(bool mctruth = true) : BaseDPLDigitizer(InitServices::FIELD | InitServices::GEOM), mWithMCTruth(mctruth) {} + + void initDigitizerTask(framework::InitContext& ic) override + { + mDisableQED = ic.options().get("disable-qed"); + } + + void run(framework::ProcessingContext& pc) + { + if (mFinished) { + return; + } + updateTimeDependentParams(pc); + + // read collision context from input + auto context = pc.inputs().get("collisioncontext"); + context->initSimChains(mID, mSimChains); + const bool withQED = context->isQEDProvided() && !mDisableQED; + auto& timesview = context->getEventRecords(withQED); + LOG(info) << "GOT " << timesview.size() << " COLLISION TIMES"; + LOG(info) << "SIMCHAINS " << mSimChains.size(); + + // if there is nothing to do ... return + if (timesview.empty()) { + return; + } + TStopwatch timer; + timer.Start(); + LOG(info) << " CALLING TRK DIGITIZATION "; + + // mDigitizer.setDigits(&mDigits); + mDigitizer.setROFRecords(&mROFRecords); + mDigitizer.setMCLabels(&mLabels); + + // digits are directly put into DPL owned resource + auto& digitsAccum = pc.outputs().make>(Output{mOrigin, "DIGITS", 0}); + + auto accumulate = [this, &digitsAccum]() { + // accumulate result of single event processing, called after processing every event supplied + // AND after the final flushing via digitizer::fillOutputContainer + if (mDigits.empty()) { + return; // no digits were flushed, nothing to accumulate + } + auto ndigAcc = digitsAccum.size(); + std::copy(mDigits.begin(), mDigits.end(), std::back_inserter(digitsAccum)); + + // fix ROFrecords references on ROF entries + auto nROFRecsOld = mROFRecordsAccum.size(); + + for (int i = 0; i < mROFRecords.size(); i++) { + auto& rof = mROFRecords[i]; + rof.setFirstEntry(ndigAcc + rof.getFirstEntry()); + rof.print(); + + if (mFixMC2ROF < mMC2ROFRecordsAccum.size()) { // fix ROFRecord entry in MC2ROF records + for (int m2rid = mFixMC2ROF; m2rid < mMC2ROFRecordsAccum.size(); m2rid++) { + // need to register the ROFRecors entry for MC event starting from this entry + auto& mc2rof = mMC2ROFRecordsAccum[m2rid]; + if (rof.getROFrame() == mc2rof.minROF) { + mFixMC2ROF++; + mc2rof.rofRecordID = nROFRecsOld + i; + mc2rof.print(); + } + } + } + } + + std::copy(mROFRecords.begin(), mROFRecords.end(), std::back_inserter(mROFRecordsAccum)); + if (mWithMCTruth) { + mLabelsAccum.mergeAtBack(mLabels); + } + LOG(info) << "Added " << mDigits.size() << " digits "; + // clean containers from already accumulated stuff + mLabels.clear(); + mDigits.clear(); + mROFRecords.clear(); + }; // and accumulate lambda + + auto& eventParts = context->getEventParts(withQED); + // loop over all composite collisions given from context (aka loop over all the interaction records) + const int bcShift = mDigitizer.getParams().getROFrameBiasInBC(); + // loop over all composite collisions given from context (aka loop over all the interaction records) + for (size_t collID = 0; collID < timesview.size(); ++collID) { + auto irt = timesview[collID]; + if (irt.toLong() < bcShift) { // due to the ROF misalignment the collision would go to negative ROF ID, discard + continue; + } + irt -= bcShift; // account for the ROF start shift + + mDigitizer.setEventTime(irt); + mDigitizer.resetEventROFrames(); // to estimate min/max ROF for this collID + // for each collision, loop over the constituents event and source IDs + // (background signal merging is basically taking place here) + for (auto& part : eventParts[collID]) { + + // get the hits for this event and this source + mHits.clear(); + context->retrieveHits(mSimChains, o2::detectors::SimTraits::DETECTORBRANCHNAMES[mID][0].c_str(), part.sourceID, part.entryID, &mHits); + + if (!mHits.empty()) { + LOG(debug) << "For collision " << collID << " eventID " << part.entryID + << " found " << mHits.size() << " hits "; + mDigitizer.process(&mHits, part.entryID, part.sourceID); // call actual digitization procedure + } + } + mMC2ROFRecordsAccum.emplace_back(collID, -1, mDigitizer.getEventROFrameMin(), mDigitizer.getEventROFrameMax()); + accumulate(); + } + mDigitizer.fillOutputContainer(); + accumulate(); + + // here we have all digits and labels and we can send them to consumer (aka snapshot it onto output) + + pc.outputs().snapshot(Output{mOrigin, "DIGITSROF", 0}, mROFRecordsAccum); + if (mWithMCTruth) { + pc.outputs().snapshot(Output{mOrigin, "DIGITSMC2ROF", 0}, mMC2ROFRecordsAccum); + auto& sharedlabels = pc.outputs().make>(Output{mOrigin, "DIGITSMCTR", 0}); + mLabelsAccum.flatten_to(sharedlabels); + // free space of existing label containers + mLabels.clear_andfreememory(); + mLabelsAccum.clear_andfreememory(); + } + LOG(info) << mID.getName() << ": Sending ROMode= " << mROMode << " to GRPUpdater"; + pc.outputs().snapshot(Output{mOrigin, "ROMode", 0}, mROMode); + + timer.Stop(); + LOG(info) << "Digitization took " << timer.CpuTime() << "s"; + + // we should be only called once; tell DPL that this process is ready to exit + pc.services().get().readyToQuit(QuitRequest::Me); + + mFinished = true; + } + + void updateTimeDependentParams(ProcessingContext& pc) + { + static bool initOnce{false}; + if (!initOnce) { + initOnce = true; + auto& digipar = mDigitizer.getParams(); + + // configure digitizer + o2::trk::GeometryTGeo* geom = o2::trk::GeometryTGeo::Instance(); + geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::L2G)); // make sure L2G matrices are loaded + mDigitizer.setGeometry(geom); + + const auto& dopt = o2::trk::DPLDigitizerParam::Instance(); + pc.inputs().get*>("ITS_alppar"); + const auto& aopt = o2::itsmft::DPLAlpideParam::Instance(); + digipar.setContinuous(dopt.continuous); + digipar.setROFrameBiasInBC(aopt.roFrameBiasInBC); + if (dopt.continuous) { + auto frameNS = aopt.roFrameLengthInBC * o2::constants::lhc::LHCBunchSpacingNS; + digipar.setROFrameLengthInBC(aopt.roFrameLengthInBC); + digipar.setROFrameLength(frameNS); // RO frame in ns + digipar.setStrobeDelay(aopt.strobeDelay); // Strobe delay wrt beginning of the RO frame, in ns + digipar.setStrobeLength(aopt.strobeLengthCont > 0 ? aopt.strobeLengthCont : frameNS - aopt.strobeDelay); // Strobe length in ns + } else { + digipar.setROFrameLength(aopt.roFrameLengthTrig); // RO frame in ns + digipar.setStrobeDelay(aopt.strobeDelay); // Strobe delay wrt beginning of the RO frame, in ns + digipar.setStrobeLength(aopt.strobeLengthTrig); // Strobe length in ns + } + // parameters of signal time response: flat-top duration, max rise time and q @ which rise time is 0 + digipar.getSignalShape().setParameters(dopt.strobeFlatTop, dopt.strobeMaxRiseTime, dopt.strobeQRiseTime0); + digipar.setChargeThreshold(dopt.chargeThreshold); // charge threshold in electrons + digipar.setNoisePerPixel(dopt.noisePerPixel); // noise level + digipar.setTimeOffset(dopt.timeOffset); + digipar.setNSimSteps(dopt.nSimSteps); + + mROMode = digipar.isContinuous() ? o2::parameters::GRPObject::CONTINUOUS : o2::parameters::GRPObject::PRESENT; + LOG(info) << mID.getName() << " simulated in " + << ((mROMode == o2::parameters::GRPObject::CONTINUOUS) ? "CONTINUOUS" : "TRIGGERED") + << " RO mode"; + + // if (oTRKParams::Instance().useDeadChannelMap) { + // pc.inputs().get("TRK_dead"); // trigger final ccdb update + // } + + // init digitizer + mDigitizer.init(); + } + // Other time-dependent parameters can be added below + } + + void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) + { + if (matcher == ConcreteDataMatcher(detectors::DetID::ITS, "ALPIDEPARAM", 0)) { + LOG(info) << mID.getName() << " Alpide param updated"; + const auto& par = o2::itsmft::DPLAlpideParam::Instance(); + par.printKeyValues(); + return; + } + // if (matcher == ConcreteDataMatcher(mOrigin, "DEADMAP", 0)) { + // LOG(info) << mID.getName() << " static dead map updated"; + // mDigitizer.setDeadChannelsMap((o2::itsmft::NoiseMap*)obj); + // return; + // } + } + + private: + bool mWithMCTruth{true}; + bool mFinished{false}; + bool mDisableQED{false}; + const o2::detectors::DetID mID{o2::detectors::DetID::TRK}; + const o2::header::DataOrigin mOrigin{o2::header::gDataOriginTRK}; + o2::trk::Digitizer mDigitizer{}; + std::vector mDigits{}; + std::vector mROFRecords{}; + std::vector mROFRecordsAccum{}; + std::vector mHits{}; + std::vector* mHitsP{&mHits}; + o2::dataformats::MCTruthContainer mLabels{}; + o2::dataformats::MCTruthContainer mLabelsAccum{}; + std::vector mMC2ROFRecordsAccum{}; + std::vector mSimChains{}; + + int mFixMC2ROF = 0; // 1st entry in mc2rofRecordsAccum to be fixed for ROFRecordID + o2::parameters::GRPObject::ROMode mROMode = o2::parameters::GRPObject::PRESENT; // readout mode +}; + +DataProcessorSpec getTRKDigitizerSpec(int channel, bool mctruth) +{ + std::string detStr = o2::detectors::DetID::getName(o2::detectors::DetID::TRK); + auto detOrig = o2::header::gDataOriginTRK; + std::vector inputs; + inputs.emplace_back("collisioncontext", "SIM", "COLLISIONCONTEXT", static_cast(channel), Lifetime::Timeframe); + inputs.emplace_back("ITS_alppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + // if (oTRKParams::Instance().useDeadChannelMap) { + // inputs.emplace_back("TRK_dead", "TRK", "DEADMAP", 0, Lifetime::Condition, ccdbParamSpec("TRK/Calib/DeadMap")); + // } + + return DataProcessorSpec{detStr + "Digitizer", + inputs, makeOutChannels(detOrig, mctruth), + AlgorithmSpec{adaptFromTask(mctruth)}, + Options{{"disable-qed", o2::framework::VariantType::Bool, false, {"disable QED handling"}}}}; +} + +} // namespace o2::trk diff --git a/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h new file mode 100644 index 0000000000000..5a1a59c3b9f5e --- /dev/null +++ b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h @@ -0,0 +1,24 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef STEER_DIGITIZERWORKFLOW_TRKDIGITIZER_H_ +#define STEER_DIGITIZERWORKFLOW_TRKDIGITIZER_H_ + +#include "Framework/DataProcessorSpec.h" + +namespace o2::trk +{ +o2::framework::DataProcessorSpec getTRKDigitizerSpec(int channel, bool mctruth = true); +} +// namespace o2::trk +// end namespace o2 + +#endif From 5fad059651ea1d743e8d338c7229b5b7d7af1f0a Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 14 Mar 2025 16:21:22 +0100 Subject: [PATCH 0095/1764] Fix round-robin reading in DigitizationContext::retrieveHits --- .../include/SimulationDataFormat/DigitizationContext.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h b/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h index 5e1f5f66b3f65..b718b2d5eb804 100644 --- a/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h +++ b/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h @@ -218,6 +218,10 @@ inline void DigitizationContext::retrieveHits(std::vector const& chains return; } br->SetAddress(&hits); + auto maxEntries = br->GetEntries(); + if (maxEntries) { + entryID %= maxEntries; + } br->GetEntry(entryID); } From 433615988f7250854c926fbe98d79f51c9377c7f Mon Sep 17 00:00:00 2001 From: apalasciano Date: Mon, 17 Feb 2025 11:51:21 +0100 Subject: [PATCH 0096/1764] Add DStar(2007)0 pdg code --- Common/Constants/include/CommonConstants/PhysicsConstants.h | 2 ++ Common/Constants/include/CommonConstants/make_pdg_header.py | 1 + 2 files changed, 3 insertions(+) diff --git a/Common/Constants/include/CommonConstants/PhysicsConstants.h b/Common/Constants/include/CommonConstants/PhysicsConstants.h index 6a8a791cffd22..c6fa3cddcdad5 100644 --- a/Common/Constants/include/CommonConstants/PhysicsConstants.h +++ b/Common/Constants/include/CommonConstants/PhysicsConstants.h @@ -52,6 +52,7 @@ enum Pdg { kDS1 = 10433, kDS2Star = 435, kDStar = 413, + kDStar0 = 423, kChiC1 = 20443, kJPsi = 443, kLambdaB0 = 5122, @@ -101,6 +102,7 @@ constexpr double MassDSStar = 2.1122; constexpr double MassDS1 = 2.53511; constexpr double MassDS2Star = 2.5691; constexpr double MassDStar = 2.01026; +constexpr double MassDStar0 = 2.00685; constexpr double MassChiC1 = 3.51067; constexpr double MassJPsi = 3.0969; constexpr double MassLambdaB0 = 5.6196; diff --git a/Common/Constants/include/CommonConstants/make_pdg_header.py b/Common/Constants/include/CommonConstants/make_pdg_header.py index 5c1e4602a9fbb..e4f92e6e8b62d 100755 --- a/Common/Constants/include/CommonConstants/make_pdg_header.py +++ b/Common/Constants/include/CommonConstants/make_pdg_header.py @@ -107,6 +107,7 @@ class Pdg(Enum): kDS1 = 10433 kDS2Star = 435 kDStar = 413 + kDStar0 = 423 kChiC1 = 20443 kJPsi = 443 kLambdaB0 = 5122 From ed0781538b248a920150cdf3afb5fe1d1ee41aa3 Mon Sep 17 00:00:00 2001 From: Andreas Molander Date: Wed, 4 Sep 2024 15:58:37 +0300 Subject: [PATCH 0097/1764] FIT: add RecPoint reader and writer workflows for FV0 and FDD The main purpose of the commit: - Adding workflows for reading RecPoints for FV0 and FDD, these are needed for MC aQC Secondary purposes that arose during development: - ROOT macro for comparing two RecPoint files - Workflows for writing RecPoints added for FT0, FV0 and FDD (i.e. simply standalone RecPoint writers, not related to reconstrucion) - The two items above were needed to verify the new RecPoint reader workflows, namely by: reconstructing CTFs -> writing RecPoints to file -> Reading RecPoints from file -> Writing RecPoints to new file -> Comparing the two files - Minor cosmetics (e.g. reordering of includes) and utility functions (print and == operator functions) --- DataFormats/Detectors/FIT/FDD/CMakeLists.txt | 1 + .../FIT/FDD/include/DataFormatsFDD/RecPoint.h | 7 +- .../Detectors/FIT/FDD/src/RecPoint.cxx | 33 ++++++ .../FT0/include/DataFormatsFT0/RecPoints.h | 6 +- .../Detectors/FIT/FT0/src/RecPoints.cxx | 14 ++- .../FV0/include/DataFormatsFV0/RecPoints.h | 4 + .../Detectors/FIT/FV0/src/RecPoints.cxx | 19 ++- Detectors/FIT/FDD/workflow/CMakeLists.txt | 10 ++ .../include/FDDWorkflow/RecPointReaderSpec.h | 4 +- .../FDD/workflow/src/RecPointReaderSpec.cxx | 8 +- .../src/recpoints-reader-workflow.cxx | 57 +++++++++ .../src/recpoints-writer-workflow.cxx | 47 ++++++++ Detectors/FIT/FT0/workflow/CMakeLists.txt | 5 + .../src/recpoints-reader-workflow.cxx | 37 +++--- .../src/recpoints-writer-workflow.cxx | 47 ++++++++ Detectors/FIT/FV0/workflow/CMakeLists.txt | 10 ++ .../src/recpoints-reader-workflow.cxx | 58 +++++++++ .../src/recpoints-writer-workflow.cxx | 47 ++++++++ Detectors/FIT/macros/CMakeLists.txt | 5 + Detectors/FIT/macros/compareRecPoints.C | 110 ++++++++++++++++++ 20 files changed, 493 insertions(+), 36 deletions(-) create mode 100644 DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx create mode 100644 Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx create mode 100644 Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx create mode 100644 Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx create mode 100644 Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx create mode 100644 Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx create mode 100644 Detectors/FIT/macros/compareRecPoints.C diff --git a/DataFormats/Detectors/FIT/FDD/CMakeLists.txt b/DataFormats/Detectors/FIT/FDD/CMakeLists.txt index 6cf2deb3f988e..140ba1165bff8 100644 --- a/DataFormats/Detectors/FIT/FDD/CMakeLists.txt +++ b/DataFormats/Detectors/FIT/FDD/CMakeLists.txt @@ -11,6 +11,7 @@ o2_add_library(DataFormatsFDD SOURCES src/RawEventData.cxx + src/RecPoint.cxx src/CTF.cxx src/LookUpTable.cxx PUBLIC_LINK_LIBRARIES O2::FDDBase diff --git a/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h b/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h index 6615dc322180b..f784d99145728 100644 --- a/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h +++ b/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h @@ -9,8 +9,9 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file RecPoint.h +/// \file RecPoint.h /// \brief Definition of the FDD RecPoint class + #ifndef ALICEO2_FDD_RECPOINT_H #define ALICEO2_FDD_RECPOINT_H @@ -42,6 +43,7 @@ struct ChannelDataFloat { } void print() const; + bool operator==(const ChannelDataFloat&) const = default; ClassDefNV(ChannelDataFloat, 1); }; @@ -80,6 +82,9 @@ class RecPoint int getFirstEntry() const { return mRef.getFirstEntry(); } int getEntriesInCurrentBC() const { return mRef.getEntries(); } + void print() const; + bool operator==(const RecPoint&) const = default; + private: o2::dataformats::RangeReference mRef; o2::InteractionRecord mIntRecord; diff --git a/DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx b/DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx new file mode 100644 index 0000000000000..854a09088a2f4 --- /dev/null +++ b/DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx @@ -0,0 +1,33 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file RecPoint.cxx +/// \brief Implementation of the FDD RecPoint class +/// \author Andreas Molander andreas.molander@cern.ch + +#include "DataFormatsFDD/RecPoint.h" +#include "Framework/Logger.h" + +using namespace o2::fdd; + +void ChannelDataFloat::print() const +{ + LOG(info) << "ChannelDataFloat data:"; + LOG(info) << "Channel ID: " << mPMNumber << ", Time (ps): " << mTime << ", Charge (ADC): " << mChargeADC << ", QTC chain: " << adcId; +} + +void RecPoint::print() const +{ + LOG(info) << "RecPoint data:"; + LOG(info) << "Collision times: A: " << getCollisionTimeA() << ", C: " << getCollisionTimeC(); + LOG(info) << "Ref first: " << mRef.getFirstEntry() << ", Ref entries: " << mRef.getEntries(); + LOG(info) << "Triggers: " << mTriggers.print(); +} diff --git a/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h b/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h index 1178cc20a4da0..d688e076489b5 100644 --- a/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h +++ b/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h @@ -47,6 +47,7 @@ struct ChannelDataFloat { } void print() const; + bool operator==(const ChannelDataFloat&) const = default; ClassDefNV(ChannelDataFloat, 1); }; @@ -74,8 +75,6 @@ class RecPoints } ~RecPoints() = default; - void print() const; - short getCollisionTime(int side) const { return mCollisionTime[side]; } short getCollisionTimeMean() const { return getCollisionTime(TimeMean); } short getCollisionTimeA() const { return getCollisionTime(TimeA); } @@ -96,6 +95,9 @@ class RecPoints gsl::span getBunchChannelData(const gsl::span tfdata) const; short static constexpr sDummyCollissionTime = 32767; + void print() const; + bool operator==(const RecPoints&) const = default; + private: std::array mCollisionTime = {sDummyCollissionTime, sDummyCollissionTime, diff --git a/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx b/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx index f580d0dd1ea8c..afd244f977f71 100644 --- a/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx +++ b/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx @@ -21,14 +21,22 @@ using namespace o2::ft0; +void ChannelDataFloat::print() const +{ + printf(" ChID% d | CFDtime=%f | QTCampl=%f QTC chain %d\n", ChId, CFDTime, QTCAmpl, ChainQTC); +} + gsl::span RecPoints::getBunchChannelData(const gsl::span tfdata) const { // extract the span of channel data for this bunch from the whole TF data return ref.getEntries() ? gsl::span(tfdata).subspan(ref.getFirstEntry(), ref.getEntries()) : gsl::span(); } -void ChannelDataFloat::print() const +void RecPoints::print() const { - - printf(" ChID% d | CFDtime=%f | QTCampl=%f QTC chain %d\n", ChId, CFDTime, QTCAmpl, ChainQTC); + LOG(info) << "RecPoint data:"; + LOG(info) << "Collision times: mean: " << getCollisionTimeMean() << ", A: " << getCollisionTimeA() << ", C: " << getCollisionTimeC(); + LOG(info) << "Vertex: " << getVertex(); + LOG(info) << "Ref first: " << ref.getFirstEntry() << ", Ref entries: " << ref.getEntries(); + LOG(info) << "Triggers: " << mTriggers.print(); } diff --git a/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h b/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h index d7ee2e67613fc..b3527fdd049d2 100644 --- a/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h +++ b/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h @@ -42,6 +42,7 @@ struct ChannelDataFloat { } void print() const; + bool operator==(const ChannelDataFloat&) const = default; ClassDefNV(ChannelDataFloat, 1); }; @@ -77,6 +78,9 @@ class RecPoints gsl::span getBunchChannelData(const gsl::span tfdata) const; short static constexpr sDummyCollissionTime = 32767; + void print() const; + bool operator==(const RecPoints&) const = default; + private: o2::dataformats::RangeReference mRef; o2::InteractionRecord mIntRecord; diff --git a/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx b/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx index 18f2effc281e8..ef1554acf5419 100644 --- a/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx +++ b/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx @@ -14,13 +14,22 @@ using namespace o2::fv0; -gsl::span RecPoints::getBunchChannelData(const gsl::span tfdata) const +void ChannelDataFloat::print() const { - // extract the span of channel data for this bunch from the whole TF data - return mRef.getEntries() ? gsl::span(tfdata).subspan(mRef.getFirstEntry(), mRef.getEntries()) : gsl::span(); + printf(" Channel=%d | time=%f | charge=%f | adcId=%d\n", channel, time, charge, adcId); } -void ChannelDataFloat::print() const +void RecPoints::print() const { - printf(" Channel=%d | time=%f | charge=%f | adcId=%d\n", channel, time, charge, adcId); + printf("RecPoint data:"); + printf("Collision times: first: %f, global mean: %f, selected mean: %f\n", getCollisionFirstTime(), getCollisionGlobalMeanTime(), getCollisionSelectedMeanTime()); + printf("Ref first: %d, Ref entries: %d\n", mRef.getFirstEntry(), mRef.getEntries()); + printf("Triggers: "); + mTriggers.print(); +} + +gsl::span RecPoints::getBunchChannelData(const gsl::span tfdata) const +{ + // extract the span of channel data for this bunch from the whole TF data + return mRef.getEntries() ? gsl::span(tfdata).subspan(mRef.getFirstEntry(), mRef.getEntries()) : gsl::span(); } diff --git a/Detectors/FIT/FDD/workflow/CMakeLists.txt b/Detectors/FIT/FDD/workflow/CMakeLists.txt index 4f7c7f44bc31b..a4bcc6f0de6fb 100644 --- a/Detectors/FIT/FDD/workflow/CMakeLists.txt +++ b/Detectors/FIT/FDD/workflow/CMakeLists.txt @@ -52,6 +52,16 @@ o2_add_executable(flp-dpl-workflow PUBLIC_LINK_LIBRARIES O2::FDDWorkflow O2::FDDRaw O2::FITWorkflow TARGETVARNAME fddflpexe) +o2_add_executable(recpoints-reader-workflow + SOURCES src/recpoints-reader-workflow.cxx + COMPONENT_NAME fdd + PUBLIC_LINK_LIBRARIES O2::FDDWorkflow) + +o2_add_executable(recpoints-writer-workflow + SOURCES src/recpoints-writer-workflow.cxx + COMPONENT_NAME fdd + PUBLIC_LINK_LIBRARIES O2::FDDWorkflow) + o2_add_executable(integrate-cluster-workflow SOURCES src/cluster-integrator.cxx COMPONENT_NAME fdd diff --git a/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h b/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h index 500883d5badfa..6c3c9694f3e1c 100644 --- a/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h +++ b/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h @@ -31,7 +31,7 @@ namespace fdd class RecPointReader : public Task { public: - RecPointReader(bool useMC = true); + RecPointReader(bool useMC = false); ~RecPointReader() override = default; void init(InitContext& ic) final; void run(ProcessingContext& pc) final; @@ -42,7 +42,7 @@ class RecPointReader : public Task std::unique_ptr mFile; std::unique_ptr mTree; - bool mUseMC = true; // use MC truth + bool mUseMC = false; // use MC truth o2::header::DataOrigin mOrigin = o2::header::gDataOriginFDD; std::vector* mRecPoints = nullptr; diff --git a/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx b/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx index 9b612c31d28e6..3c4812c75b251 100644 --- a/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx +++ b/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx @@ -11,16 +11,14 @@ /// @file RecPointReaderSpec.cxx -#include - -#include "TTree.h" - -#include "Framework/ControlService.h" #include "Framework/ConfigParamRegistry.h" +#include "Framework/ControlService.h" #include "Framework/Logger.h" #include "FDDWorkflow/RecPointReaderSpec.h" #include "CommonUtils/NameConf.h" +#include + using namespace o2::framework; using namespace o2::fdd; diff --git a/Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx b/Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx new file mode 100644 index 0000000000000..fcef4cc46901f --- /dev/null +++ b/Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx @@ -0,0 +1,57 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-reader-workflow.cxx +/// \brief FDD RecPoints reader workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "FDDWorkflow/RecPointReaderSpec.h" + +#include "CommonUtils/ConfigurableParam.h" +#include "DetectorsRaw/HBFUtilsInitializer.h" +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include + +using namespace o2::framework; + +void customize(std::vector& policies) +{ + o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); +} + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + o2::raw::HBFUtilsInitializer::addConfigOption(options); + std::swap(workflowOptions, options); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + WorkflowSpec specs; + DataProcessorSpec producer = o2::fdd::getFDDRecPointReaderSpec(!disableMC); + specs.push_back(producer); + + // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit + o2::raw::HBFUtilsInitializer hbfIni(ctx, specs); + return specs; +} diff --git a/Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx b/Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx new file mode 100644 index 0000000000000..e53ccd14c30ab --- /dev/null +++ b/Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx @@ -0,0 +1,47 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-writer-workflow.cxx +/// \brief FDD RecPoints writer workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "FDDWorkflow/RecPointWriterSpec.h" + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include + +using namespace o2::framework; + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + workflowOptions.insert(workflowOptions.end(), options.begin(), options.end()); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::fdd::getFDDRecPointWriterSpec(!disableMC); + specs.push_back(producer); + return specs; +} diff --git a/Detectors/FIT/FT0/workflow/CMakeLists.txt b/Detectors/FIT/FT0/workflow/CMakeLists.txt index 2dbbbae41e261..123a29293e2fb 100644 --- a/Detectors/FIT/FT0/workflow/CMakeLists.txt +++ b/Detectors/FIT/FT0/workflow/CMakeLists.txt @@ -98,6 +98,11 @@ o2_add_executable(recpoints-reader-workflow COMPONENT_NAME ft0 PUBLIC_LINK_LIBRARIES O2::FT0Workflow) +o2_add_executable(recpoints-writer-workflow + SOURCES src/recpoints-writer-workflow.cxx + COMPONENT_NAME ft0 + PUBLIC_LINK_LIBRARIES O2::FT0Workflow) + o2_add_executable(integrate-cluster-workflow SOURCES src/cluster-integrator.cxx COMPONENT_NAME ft0 diff --git a/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx b/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx index d323b4135d7ea..b1d824e10687e 100644 --- a/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx +++ b/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx @@ -9,23 +9,24 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file recpoints-reader-workflow.cxx -/// \brief Implementation of FT0 digits reader +/// \file recpoints-reader-workflow.cxx +/// \brief FT0 RecPoints reader workflow /// -/// \author ruben.shahoyan@cern.ch +/// \author ruben.shahoyan@cern.ch, Andreas Molander andreas.molander@cern.ch -#include "Framework/CallbackService.h" -#include "Framework/ControlService.h" -#include "Framework/CallbacksPolicy.h" -#include "Framework/ConfigParamRegistry.h" -#include "Framework/Task.h" -#include "FT0Workflow/RecPointReaderSpec.h" #include "CommonUtils/ConfigurableParam.h" #include "DetectorsRaw/HBFUtilsInitializer.h" +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include "FT0Workflow/RecPointReaderSpec.h" + +#include using namespace o2::framework; -void customize(std::vector& policies) +void customize(std::vector& policies) { o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); } @@ -33,12 +34,9 @@ void customize(std::vector& policies) // we need to add workflow options before including Framework/runDataProcessing void customize(std::vector& workflowOptions) { - // option allowing to set parameters - - std::vector options{ - {"disable-mc", o2::framework::VariantType::Bool, false, {"disable MC propagation even if available"}}}; - std::string keyvaluehelp("Semicolon separated key=value strings"); - options.push_back(ConfigParamSpec{"configKeyValues", VariantType::String, "", {keyvaluehelp}}); + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; o2::raw::HBFUtilsInitializer::addConfigOption(options); std::swap(workflowOptions, options); } @@ -47,10 +45,13 @@ void customize(std::vector& workflowOptions) WorkflowSpec defineDataProcessing(const ConfigContext& ctx) { - WorkflowSpec specs; o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); - DataProcessorSpec producer = o2::ft0::getRecPointReaderSpec(ctx.options().get("disable-mc")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::ft0::getRecPointReaderSpec(!disableMC); specs.push_back(producer); + // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit o2::raw::HBFUtilsInitializer hbfIni(ctx, specs); return specs; diff --git a/Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx b/Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx new file mode 100644 index 0000000000000..c8baef76b5ccd --- /dev/null +++ b/Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx @@ -0,0 +1,47 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-writer-workflow.cxx +/// \brief FT0 RecPoints writer workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include "FT0Workflow/RecPointWriterSpec.h" + +#include + +using namespace o2::framework; + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + workflowOptions.insert(workflowOptions.end(), options.begin(), options.end()); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::ft0::getRecPointWriterSpec(!disableMC); + specs.push_back(producer); + return specs; +} diff --git a/Detectors/FIT/FV0/workflow/CMakeLists.txt b/Detectors/FIT/FV0/workflow/CMakeLists.txt index eec745d5fdf1e..a304adc61b5fd 100644 --- a/Detectors/FIT/FV0/workflow/CMakeLists.txt +++ b/Detectors/FIT/FV0/workflow/CMakeLists.txt @@ -53,6 +53,16 @@ o2_add_executable(flp-dpl-workflow PUBLIC_LINK_LIBRARIES O2::FV0Workflow O2::FITWorkflow O2::FV0Raw TARGETVARNAME fv0flpexe) +o2_add_executable(recpoints-reader-workflow + SOURCES src/recpoints-reader-workflow.cxx + COMPONENT_NAME fv0 + PUBLIC_LINK_LIBRARIES O2::FV0Workflow) + +o2_add_executable(recpoints-writer-workflow + SOURCES src/recpoints-writer-workflow.cxx + COMPONENT_NAME fv0 + PUBLIC_LINK_LIBRARIES O2::FV0Workflow) + o2_add_executable(integrate-cluster-workflow SOURCES src/cluster-integrator.cxx COMPONENT_NAME fv0 diff --git a/Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx b/Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx new file mode 100644 index 0000000000000..ecbe89b8bbed3 --- /dev/null +++ b/Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx @@ -0,0 +1,58 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-reader-workflow.cxx +/// \brief FV0 RecPoints reader workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "CommonUtils/ConfigurableParam.h" +#include "DetectorsRaw/HBFUtilsInitializer.h" +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include "FV0Workflow/RecPointReaderSpec.h" + +#include + +using namespace o2::framework; + +void customize(std::vector& policies) +{ + o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); +} + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + o2::raw::HBFUtilsInitializer::addConfigOption(options); + std::swap(workflowOptions, options); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::fv0::getRecPointReaderSpec(!disableMC); + specs.push_back(producer); + + // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit + o2::raw::HBFUtilsInitializer hbfIni(ctx, specs); + return specs; +} diff --git a/Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx b/Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx new file mode 100644 index 0000000000000..0fd3bd3bef2e8 --- /dev/null +++ b/Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx @@ -0,0 +1,47 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-writer-workflow.cxx +/// \brief FV0 RecPoints writer workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "FV0Workflow/RecPointWriterSpec.h" + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include + +using namespace o2::framework; + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + workflowOptions.insert(workflowOptions.end(), options.begin(), options.end()); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::fv0::getRecPointWriterSpec(!disableMC); + specs.push_back(producer); + return specs; +} diff --git a/Detectors/FIT/macros/CMakeLists.txt b/Detectors/FIT/macros/CMakeLists.txt index 81f2cc05e0b25..e7debb4184325 100644 --- a/Detectors/FIT/macros/CMakeLists.txt +++ b/Detectors/FIT/macros/CMakeLists.txt @@ -40,5 +40,10 @@ o2_add_test_root_macro(readFITDCSdata.C O2::CCDB LABELS fit) +o2_add_test_root_macro(compareRecPoints.C + PUBLIC_LINK_LIBRARIES O2::DataFormatsFT0 + O2::DataFormatsFIT + LABELS fit) + o2_data_file(COPY readFITDCSdata.C DESTINATION Detectors/FIT/macros/) o2_data_file(COPY readFITDeadChannelMap.C DESTINATION Detectors/FIT/macros/) \ No newline at end of file diff --git a/Detectors/FIT/macros/compareRecPoints.C b/Detectors/FIT/macros/compareRecPoints.C new file mode 100644 index 0000000000000..0ce077bc616ba --- /dev/null +++ b/Detectors/FIT/macros/compareRecPoints.C @@ -0,0 +1,110 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file compareRecPoints.C +/// \brief ROOT macro to compare two trees with RecPoints +/// +/// \author Artur Furs artur.furs@cern.ch, Andreas Molander andreas.molander@cern.ch + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include "DataFormatsFT0/RecPoints.h" +#include "DataFormatsFV0/RecPoints.h" +#include "DataFormatsFDD/RecPoint.h" + +#include "TFile.h" +#include "TTree.h" + +#include +#include +#include +#include +#endif + +void compareRecPoints(std::string filename1, std::string filename2) +{ + std::unique_ptr file1(TFile::Open(filename1.c_str(), "READ")); + TTree* tree1 = (TTree*)file1->Get("o2sim"); + + std::unique_ptr file2(TFile::Open(filename2.c_str(), "READ")); + TTree* tree2 = (TTree*)file2->Get("o2sim"); + + if (tree1->GetEntries() != tree2->GetEntries()) { + std::cout << "Non equal number of entries in trees!" << std::endl; + return; + } + + typedef typename o2::ft0::RecPoints RecPoint; + typedef typename o2::ft0::ChannelDataFloat ChannelDataFloat; + + std::vector vecRecPoints1; + std::vector* ptrVecRecPoints1 = &vecRecPoints1; + + std::vector vecChannelDataFloat1; + std::vector* ptrVecChannelDataFloat1 = &vecChannelDataFloat1; + + tree1->SetBranchAddress("FT0Cluster", &ptrVecRecPoints1); + tree1->SetBranchAddress("FT0RecChData", &ptrVecChannelDataFloat1); + + std::vector vecRecPoints2; + std::vector* ptrVecRecPoints2 = &vecRecPoints2; + + std::vector vecChannelDataFloat2; + std::vector* ptrVecChannelDataFloat2 = &vecChannelDataFloat2; + + tree2->SetBranchAddress("FT0Cluster", &ptrVecRecPoints2); + tree2->SetBranchAddress("FT0RecChData", &ptrVecChannelDataFloat2); + + for (int iEntry = 0; iEntry < tree1->GetEntries(); iEntry++) { + tree1->GetEntry(iEntry); + tree2->GetEntry(iEntry); + + if (vecRecPoints1 != vecRecPoints2) { + std::cout << "Non equal RecPoints vector!" << std::endl; + + if (vecRecPoints1.size() == vecRecPoints2.size()) { + for (int iEvent = 0; iEvent < vecRecPoints1.size(); iEvent++) { + const auto& recPoint1 = vecRecPoints1[iEvent]; + const auto& recPoint2 = vecRecPoints2[iEvent]; + + if (!(recPoint1 == recPoint2)) { + std::cout << "First RecPoint" << std::endl; + recPoint1.print(); + std::cout << "Second RecPoint" << std::endl; + recPoint2.print(); + } + } + } else { + std::cout << "Non equal number of RecPoints!" << std::endl; + } + } + if (vecChannelDataFloat1 != vecChannelDataFloat2) { + std::cout << "Non equal ChannelDataFloat vector!" << std::endl; + + if (vecChannelDataFloat1.size() == vecChannelDataFloat2.size()) { + for (int iEvent = 0; iEvent < vecChannelDataFloat1.size(); iEvent++) { + const auto& channelDataFloat1 = vecChannelDataFloat1[iEvent]; + const auto& channelDataFloat2 = vecChannelDataFloat2[iEvent]; + + if (!(channelDataFloat1 == channelDataFloat2)) { + std::cout << "First ChannelDataFloat" << std::endl; + channelDataFloat1.print(); + std::cout << "Second ChannelDataFloat" << std::endl; + channelDataFloat2.print(); + } + } + } else { + std::cout << "Non equal number of ChannelDataFloat!" << std::endl; + } + } + } + + return; +} \ No newline at end of file From 95d50444d9d733b58f8f7b4cc03c43bcefb64316 Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 18 Mar 2025 17:04:26 +0100 Subject: [PATCH 0098/1764] Fix BC filling in per TF digi contexts --- DataFormats/simulation/src/DigitizationContext.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/DataFormats/simulation/src/DigitizationContext.cxx b/DataFormats/simulation/src/DigitizationContext.cxx index e875cb61951a9..499a478bc5148 100644 --- a/DataFormats/simulation/src/DigitizationContext.cxx +++ b/DataFormats/simulation/src/DigitizationContext.cxx @@ -679,6 +679,7 @@ DigitizationContext DigitizationContext::extractSingleTimeframe(int timeframeid, } r.mSimPrefixes = mSimPrefixes; r.mMuBC = mMuBC; + r.mBCFilling = mBCFilling; try { auto tf_ranges = timeframeindices.at(timeframeid); From dcb02ef906d422fcdaae4dff21e335f6e8bba914 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 10:21:46 +0100 Subject: [PATCH 0099/1764] GPU Display: suppress compiler warning with sanitizers enabled --- GPU/GPUTracking/display/GPUDisplay.h | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 73f65b6b24241..bb270cda23565 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -161,6 +161,7 @@ class GPUDisplay : public GPUDisplayInterface { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-security" +#pragma GCC diagnostic ignored "-Wformat-truncation" snprintf(mInfoText2, 1024, args...); #pragma GCC diagnostic pop GPUInfo("%s", mInfoText2); From c903618bcbda071dbd6e1538276176d1197bc6fc Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 10:22:09 +0100 Subject: [PATCH 0100/1764] GPU Standalone can compile with sanitizers also without debug build --- GPU/GPUTracking/Standalone/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index b9620b9385c73..de245a71845c3 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -50,9 +50,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") - if (GPUCA_BUILD_DEBUG_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang - endif() set(CMAKE_BUILD_TYPE DEBUG) else() set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector") @@ -67,6 +64,9 @@ else() set(CMAKE_BUILD_TYPE RELEASE) add_definitions(-DNDEBUG) endif() +if (GPUCA_BUILD_DEBUG_SANITIZE) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang +endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") From 5b6c4855a4a38b1960186dd0d64ea3c72220ed3d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:05:52 +0100 Subject: [PATCH 0101/1764] GPU: Disable clang warnings when using C variable length array extension in C++ --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index de245a71845c3..8fa8f0c2b68c9 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -67,7 +67,7 @@ endif() if (GPUCA_BUILD_DEBUG_SANITIZE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") # Find mandatory packages From 7a68fc21493361c9f9acb79d6411f60d6d8b1d88 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:20:26 +0100 Subject: [PATCH 0102/1764] GPU Standalone: Fix build using clang compiler with sanitizers --- GPU/GPUTracking/Standalone/CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 8fa8f0c2b68c9..6e536727a0c67 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -65,7 +65,10 @@ else() add_definitions(-DNDEBUG) endif() if (GPUCA_BUILD_DEBUG_SANITIZE) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") + if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") + endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") @@ -247,6 +250,13 @@ if(GPUCA_CONFIG_ROOT) endif() target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) +if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" ABSOLUTE) + target_link_options(ca PRIVATE "-Wl,-rpath,${CLANG_ASAN_SO_PATH}") +endif() + # Installation install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") From 1eebd48731f33ec692a7d866908abcb9b498a4f1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:38:06 +0100 Subject: [PATCH 0103/1764] GPU: Suppress another clang warning --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index a4074282da30f..a8a83fdbd9203 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -112,7 +112,12 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) { +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below +#endif std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); +#pragma GCC diagnostic push } template From ce9dd396d2e570fb8897a351c588bedac11a2e1c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 13:11:30 +0100 Subject: [PATCH 0104/1764] GPU: Fix some minor issues indicated by clang sanitizer --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 22 +++++++++++++------ .../Global/GPUChainTrackingSectorTracker.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 11 +++++----- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 35e44d99d5c0c..8bae1df267412 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -147,8 +147,12 @@ int32_t GPUReconstruction::Init() if (InitDevice()) { return 1; } - mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; - mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; + mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + } else { + mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } if (InitPhasePermanentMemory()) { return 1; } @@ -860,14 +864,18 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) FreeRegisteredMemory(i); } } - mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); - mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); mUnmanagedChunks.clear(); - mVolatileMemoryStart = nullptr; mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); - mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); - mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + mVolatileMemoryStart = nullptr; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); + mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); + mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); + mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + } else { + mHostMemoryPool = mDeviceMemoryPool = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } } void GPUReconstruction::UpdateMaxMemoryUsed() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index dd71a797f2744..e161f74a31032 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -150,7 +150,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f373d56ea0395..eb1df3f37b6b5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1706,20 +1706,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); + const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); continue; } - GPUTPCGMMergedTrackHit* cl = mClusters + iOutTrackFirstCluster; - GPUTPCGMMergedTrackHitXYZ* clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1760,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1791,12 +1791,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3bd2257d02e01..d235b3398c062 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1091,7 +1091,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); From 0064a3af79e6f92ee99725cf7a2d024b667bac2e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 13:28:56 +0100 Subject: [PATCH 0105/1764] With -ffast-math, std::finite is UB and one shoult assume all float to be finite --- Common/MathUtils/include/MathUtils/detail/basicMath.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3fc3fe374b380..3565764435a68 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -113,7 +113,11 @@ GPUdi() int nint(double x) template <> GPUdi() bool finite(double x) { +#ifdef __FAST_MATH__ + return false; +#else return std::isfinite(x); +#endif } template <> GPUdi() double log(double x) From cb5e20f94f5717e0c00aa33d5f40f23f40dcb1d4 Mon Sep 17 00:00:00 2001 From: lietava Date: Tue, 18 Mar 2025 16:03:48 +0100 Subject: [PATCH 0106/1764] fix: debug removed --- Detectors/CTP/workflowScalers/src/RunManager.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index 9af5b5e104120..ac3eda60094e9 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -126,10 +126,9 @@ int CTPRunManager::loadRun(const std::string& cfg) } int CTPRunManager::setRunConfigBK(uint32_t runNumber, const std::string& cfg) { - std::cout << "Printing cfg:" << cfg << std::endl; + std::cout << "Printing run:" << runNumber << " cfg:" << cfg << std::endl; if (mBKClient) { try { - uint32_t runNumber = 1; mBKClient->run()->setRawCtpTriggerConfiguration(runNumber, cfg); } catch (std::runtime_error& error) { std::cerr << "An error occurred: " << error.what() << std::endl; From c71128566f99d77103251fd8e07c2d9e4ba5a947 Mon Sep 17 00:00:00 2001 From: Maurice Coquet Date: Tue, 18 Mar 2025 16:05:11 +0100 Subject: [PATCH 0107/1764] Fixing CCDB host address for MFT noise scan --- Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx b/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx index 617e02ebb9d9c..a34d8cc5f2975 100644 --- a/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx +++ b/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx @@ -59,7 +59,7 @@ void NoiseCalibratorSpec::init(InitContext& ic) mPathDcs = ic.options().get("path-DCS"); mOutputType = ic.options().get("send-to-server"); mNoiseMapForDcs.clear(); - api.init("http://alice-ccdb.cern.ch"); + api.init(o2::base::NameConf::getCCDBServer()); } void NoiseCalibratorSpec::run(ProcessingContext& pc) From ad59c78eed8256990430777517aa6f4ca89cd954 Mon Sep 17 00:00:00 2001 From: pillot Date: Sat, 15 Mar 2025 10:29:56 +0100 Subject: [PATCH 0108/1764] turn error into warning and continue the scan --- Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx index 236effc4b1182..ab716bc6eb752 100644 --- a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -479,8 +479,7 @@ void fillDataPoints(const std::vector& dps, std::map& d auto previousTS = dps2.rbegin()->first; if (ts != previousTS || getValue(*itDP) != dps2.rbegin()->second) { if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); - exit(1); + printf("\e[0;31mwarning: wrong data point order (%llu <= %llu)\e[0m\n", (ULL)ts, (ULL)previousTS); } if (printWarning) { printf("%s%s missing the previous data point (dt = %s%llu ms)", color.c_str(), header.c_str(), @@ -512,8 +511,9 @@ void fillDataPoints(const std::vector& dps, std::map& d for (++itDP; itDP < dps.end(); ++itDP) { ts = itDP->get_epoch_time(); if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); - exit(1); + printf("\e[0;31mwarning: wrong data point order (%llu <= %llu)\e[0m\n", (ULL)ts, (ULL)previousTS); + } else { + previousTS = ts; } if (ts < tMin && (warningLevel > 1 || (warningLevel == 1 && ts + tolerance < tMin))) { printf("%s%s data point outside of file validity range (dt = -%llu ms)\e[0m\n", @@ -523,7 +523,6 @@ void fillDataPoints(const std::vector& dps, std::map& d header.c_str(), (ULL)(ts - tMax)); } dps2.emplace(ts, getValue(*itDP)); - previousTS = ts; } } From 1b5b9b672f89afd25af46309bce2147da41b685b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barth=C3=A9l=C3=A9my=20von=20Haller?= Date: Fri, 14 Mar 2025 16:30:49 +0100 Subject: [PATCH 0109/1764] Avoid fatal in CTPRateFetcher --- DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx index 43fa9dbe7f3f3..67e59aad3ea24 100644 --- a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx +++ b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx @@ -46,7 +46,8 @@ double CTPRateFetcher::fetchNoPuCorr(o2::ccdb::BasicCCDBManager* ccdb, uint64_t LOG(info) << "Trying different class"; ret = fetchCTPratesClassesNoPuCorr(timeStamp, "CMTVX-NONE"); if (ret < 0) { - LOG(fatal) << "None of the classes used for lumi found"; + LOG(error) << "None of the classes used for lumi found"; + return -1.; } } return ret; @@ -245,17 +246,19 @@ void CTPRateFetcher::setupRun(int runNumber, o2::ccdb::BasicCCDBManager* ccdb, u return; } mRunNumber = runNumber; - LOG(info) << "Setting up CTP scalers for run " << mRunNumber; + LOG(info) << "Setting up CTP scalers for run " << mRunNumber << " and timestamp : " << timeStamp; auto ptrLHCIFdata = ccdb->getSpecific("GLO/Config/GRPLHCIF", timeStamp); if (ptrLHCIFdata == nullptr) { - LOG(fatal) << "GRPLHCIFData not in database, timestamp:" << timeStamp; + LOG(error) << "GRPLHCIFData not in database, timestamp:" << timeStamp; + return; } mLHCIFdata = *ptrLHCIFdata; std::map metadata; metadata["runNumber"] = std::to_string(mRunNumber); auto ptrConfig = ccdb->getSpecific("CTP/Config/Config", timeStamp, metadata); if (ptrConfig == nullptr) { - LOG(fatal) << "CTPRunConfig not in database, timestamp:" << timeStamp; + LOG(error) << "CTPRunConfig not in database, timestamp:" << timeStamp; + return; } mConfig = *ptrConfig; if (initScalers) { @@ -264,7 +267,7 @@ void CTPRateFetcher::setupRun(int runNumber, o2::ccdb::BasicCCDBManager* ccdb, u mScalers = *ptrScalers; mScalers.convertRawToO2(); } else { - LOG(fatal) << "CTPRunScalers not in database, timestamp:" << timeStamp; + LOG(error) << "CTPRunScalers not in database, timestamp:" << timeStamp; } } } From af7c9d3a03dcf99e6ee587c311c05494cec27a35 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 19 Mar 2025 22:30:19 +0100 Subject: [PATCH 0110/1764] GPU Standalone: Simplify CMake script --- .../Standalone/Benchmark/CMakeLists.txt | 4 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 70 +++++++------------ GPU/GPUTracking/display/CMakeLists.txt | 3 + 3 files changed, 32 insertions(+), 45 deletions(-) diff --git a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt index fe583be02fe6b..de2d683036193 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt @@ -22,8 +22,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") TARGETVARNAME targetName PUBLIC_LINK_LIBRARIES O2::GPUO2Interface O2::GPUTracking SOURCES ${SRCS}) - - target_compile_definitions(${targetName} PUBLIC $) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -31,6 +29,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName ca) endif() +target_compile_definitions(${targetName} PUBLIC $) + if(ROOT_FOUND) target_sources(${targetName} PRIVATE ../../qa/genEvents.cxx) endif() diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 6e536727a0c67..5ed511173f53e 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -187,40 +187,33 @@ include_directories(${GPU_DIR}/Common # Create main targets add_subdirectory(../../ GPU) add_library(standalone_support SHARED ${GPUTRACKING_DIR}/utils/EmptyFile.cxx) +target_compile_definitions(standalone_support PUBLIC $) -target_link_libraries(GPUTracking PUBLIC TPCFastTransformation standalone_support dl) +target_link_libraries(standalone_support PUBLIC# + dl + pthread + Microsoft.GSL::GSL + TPCFastTransformation) +target_link_libraries(GPUTracking PUBLIC TPCFastTransformation standalone_support) target_link_libraries(ca PUBLIC GPUTracking) -target_link_libraries(standalone_support PUBLIC dl pthread) -target_compile_definitions(ca PUBLIC $) -target_compile_definitions(standalone_support PUBLIC $) # Add all sources and dependencies to to support based on Config File target_sources(standalone_support PRIVATE - ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx - ${O2_DIR}/Detectors/Base/src/Ray.cxx - ${O2_DIR}/Detectors/Base/src/Propagator.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) -if(CONFIG_O2_ITS_TRAITS) - target_sources(standalone_support PRIVATE - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/PrimaryVertexContext.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/TrackerTraitsCPU.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ROframe.cxx) - target_link_libraries(standalone_support PUBLIC Boost::boost) -endif() + ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx + ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx + ${O2_DIR}/Detectors/Base/src/Ray.cxx + ${O2_DIR}/Detectors/Base/src/Propagator.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) if(GPUCA_CONFIG_FMT) target_link_libraries(standalone_support PUBLIC fmt::fmt) @@ -232,23 +225,14 @@ if(GPUCA_CONFIG_VC) target_link_libraries(TPCFastTransformation PUBLIC Vc::Vc) endif() -if(GPUCA_BUILD_EVENT_DISPLAY) - if(GPUCA_CONFIG_GL3W) - target_sources(GPUTrackingDisplay PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/gl3w.c) - endif() - target_sources(GPUTracking PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/HandMadeMath/HandMadeMathImpl.cxx) - target_include_directories(GPUTracking SYSTEM PUBLIC ${GPUTRACKING_DIR}/display/3rdparty) -endif() - if(GPUCA_CONFIG_ROOT) target_link_libraries(standalone_support PUBLIC - ROOT::Core - ROOT::RIO - ROOT::Hist - ROOT::Gui - ROOT::Tree) + ROOT::Core + ROOT::RIO + ROOT::Hist + ROOT::Gui + ROOT::Tree) endif() -target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index 68385d7916234..b3107dbec8c79 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -179,9 +179,12 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() if(GPUCA_CONFIG_GL3W) target_compile_definitions(${targetName} PRIVATE GPUCA_DISPLAY_GL3W) + target_sources(${targetName} PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/gl3w.c) else() target_link_libraries(${targetName} PRIVATE ${GLEW_LIBRARIES}) endif() + target_sources(${targetName} PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/HandMadeMath/HandMadeMathImpl.cxx) + target_include_directories(${targetName} SYSTEM PUBLIC ${GPUTRACKING_DIR}/display/3rdparty) endif() if (OPENGL_GLU_FOUND) From 9046e703929252511633b3ac75a06f618802e2b3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 19 Mar 2025 22:35:24 +0100 Subject: [PATCH 0111/1764] GPU Standalone: trap on FPE by default only when not using ffast-math --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 40a7fc71cbb4d..a1f650a2bc56e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -521,7 +521,7 @@ AddOption(outputcontrolmem, uint64_t, 0, "outputMemory", 0, "Use predefined outp AddOption(inputcontrolmem, uint64_t, 0, "inputMemory", 0, "Use predefined input buffer of this size", min(0ul), message("Using %s bytes as input memory")) AddOption(cpuAffinity, int32_t, -1, "", 0, "Pin CPU affinity to this CPU core", min(-1)) AddOption(fifoScheduler, bool, false, "", 0, "Use FIFO realtime scheduler", message("Setting FIFO scheduler: %s")) -AddOption(fpe, bool, true, "", 0, "Trap on floating point exceptions") +AddOption(fpe, int8_t, -1, "", 0, "Trap on floating point exceptions (-1 = if no ffast-math)") AddOption(flushDenormals, bool, true, "", 0, "Enable FTZ and DAZ (Flush all denormals to zero)") AddOption(solenoidBzNominalGPU, float, -1e6f, "", 0, "Field strength of solenoid Bz in kGaus") AddOption(constBz, bool, false, "", 0, "Force constand Bz") diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index d6279df7c9188..e9cb7c5179c59 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -142,7 +142,11 @@ int32_t ReadConfiguration(int argc, char** argv) return 1; } } +#ifdef __FAST_MATH__ + if (configStandalone.fpe == 1) { +#else if (configStandalone.fpe) { +#endif feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); } if (configStandalone.flushDenormals) { @@ -158,7 +162,7 @@ int32_t ReadConfiguration(int argc, char** argv) printf("FIFO Scheduler setting not supported on Windows\n"); return 1; } - if (configStandalone.fpe) { + if (configStandalone.fpe == 1) { printf("FPE not supported on Windows\n"); return 1; } From 90267bb6455493078cb2e97139d7d8443320034f Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 20 Mar 2025 08:36:09 +0100 Subject: [PATCH 0112/1764] DPL: enable early forwarding for AODs (#14088) Should improve parallelism for long trains. Requires FairMQ 1.9.2 and one needs to pass `--early-forwarding-policy always` for this to take effect. --- .../Framework/CompletionPolicyHelpers.h | 5 ++++ Framework/Core/include/Framework/InputSpan.h | 15 +++++++++++- Framework/Core/src/CompletionPolicy.cxx | 4 ++++ .../Core/src/CompletionPolicyHelpers.cxx | 18 ++++++++++++++ Framework/Core/src/DataProcessingDevice.cxx | 15 +++++++++++- Framework/Core/src/DataRelayer.cxx | 24 +++++++++++++++++-- Framework/Core/src/InputSpan.cxx | 7 ++++-- .../Core/test/test_InputRecordWalker.cxx | 2 +- Framework/Core/test/test_InputSpan.cxx | 2 +- Framework/Utils/test/RawPageTestData.h | 6 +++-- 10 files changed, 88 insertions(+), 10 deletions(-) diff --git a/Framework/Core/include/Framework/CompletionPolicyHelpers.h b/Framework/Core/include/Framework/CompletionPolicyHelpers.h index 547add44560ea..aa336d040d30d 100644 --- a/Framework/Core/include/Framework/CompletionPolicyHelpers.h +++ b/Framework/Core/include/Framework/CompletionPolicyHelpers.h @@ -43,6 +43,11 @@ struct CompletionPolicyHelpers { /// When any of the parts of the record have been received, consume them. static CompletionPolicy consumeWhenAny(const char* name, CompletionPolicy::Matcher matcher); + +#if __has_include() + /// When any of the parts which has arrived has a refcount of 1. + static CompletionPolicy consumeWhenAnyZeroCount(const char* name, CompletionPolicy::Matcher matcher); +#endif /// Default matcher applies for all devices static CompletionPolicy consumeWhenAny(CompletionPolicy::Matcher matcher = [](auto const&) -> bool { return true; }) { diff --git a/Framework/Core/include/Framework/InputSpan.h b/Framework/Core/include/Framework/InputSpan.h index c435276c7134f..cf8c8acda6796 100644 --- a/Framework/Core/include/Framework/InputSpan.h +++ b/Framework/Core/include/Framework/InputSpan.h @@ -46,7 +46,7 @@ class InputSpan /// index and the buffer associated. /// @nofPartsGetter is the getter for the number of parts associated with an index /// @a size is the number of elements in the span. - InputSpan(std::function getter, std::function nofPartsGetter, size_t size); + InputSpan(std::function getter, std::function nofPartsGetter, std::function refCountGetter, size_t size); /// @a i-th element of the InputSpan [[nodiscard]] DataRef get(size_t i, size_t partidx = 0) const @@ -66,6 +66,18 @@ class InputSpan return mNofPartsGetter(i); } + // Get the refcount for a given part + [[nodiscard]] int getRefCount(size_t i) const + { + if (i >= mSize) { + return 0; + } + if (!mRefCountGetter) { + return -1; + } + return mRefCountGetter(i); + } + /// Number of elements in the InputSpan [[nodiscard]] size_t size() const { @@ -236,6 +248,7 @@ class InputSpan private: std::function mGetter; std::function mNofPartsGetter; + std::function mRefCountGetter; size_t mSize; }; diff --git a/Framework/Core/src/CompletionPolicy.cxx b/Framework/Core/src/CompletionPolicy.cxx index 9d92fd07e6f5a..ec8997e32c5db 100644 --- a/Framework/Core/src/CompletionPolicy.cxx +++ b/Framework/Core/src/CompletionPolicy.cxx @@ -26,7 +26,11 @@ std::vector { return { CompletionPolicyHelpers::consumeWhenAllOrdered("internal-dpl-aod-writer"), +#if __has_include() + CompletionPolicyHelpers::consumeWhenAnyZeroCount("internal-dpl-injected-dummy-sink", [](DeviceSpec const& s) { return s.name.find("internal-dpl-injected-dummy-sink") != std::string::npos; }), +#else CompletionPolicyHelpers::consumeWhenAny("internal-dpl-injected-dummy-sink", [](DeviceSpec const& s) { return s.name.find("internal-dpl-injected-dummy-sink") != std::string::npos; }), +#endif CompletionPolicyHelpers::consumeWhenAll()}; } diff --git a/Framework/Core/src/CompletionPolicyHelpers.cxx b/Framework/Core/src/CompletionPolicyHelpers.cxx index 9dd895a6fed6d..e682f9a7c7dd6 100644 --- a/Framework/Core/src/CompletionPolicyHelpers.cxx +++ b/Framework/Core/src/CompletionPolicyHelpers.cxx @@ -19,6 +19,9 @@ #include "Framework/TimingInfo.h" #include "DecongestionService.h" #include "Framework/Signpost.h" +#if __has_include() +#include +#endif #include #include @@ -249,6 +252,21 @@ CompletionPolicy CompletionPolicyHelpers::consumeExistingWhenAny(const char* nam }}; } +#if __has_include() +CompletionPolicy CompletionPolicyHelpers::consumeWhenAnyZeroCount(const char* name, CompletionPolicy::Matcher matcher) +{ + auto callback = [](InputSpan const& inputs, std::vector const&, ServiceRegistryRef& ref) -> CompletionPolicy::CompletionOp { + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs.get(i).header != nullptr && inputs.getRefCount(i) == 1) { + return CompletionPolicy::CompletionOp::Consume; + } + } + return CompletionPolicy::CompletionOp::Wait; + }; + return CompletionPolicy{name, matcher, callback, false}; +} +#endif + CompletionPolicy CompletionPolicyHelpers::consumeWhenAny(const char* name, CompletionPolicy::Matcher matcher) { auto callback = [](InputSpan const& inputs, std::vector const&, ServiceRegistryRef& ref) -> CompletionPolicy::CompletionOp { diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index 7f42805cfdb1e..ae25d8d3a915c 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -57,6 +57,9 @@ #include #include #include +#if __has_include() +#include +#endif #include #include #include @@ -1214,12 +1217,14 @@ void DataProcessingDevice::fillContext(DataProcessorContext& context, DeviceCont if (forwarded.matcher.lifetime != Lifetime::Condition) { onlyConditions = false; } +#if !__has_include() if (strncmp(DataSpecUtils::asConcreteOrigin(forwarded.matcher).str, "AOD", 3) == 0) { context.canForwardEarly = false; overriddenEarlyForward = true; LOG(detail) << "Cannot forward early because of AOD input: " << DataSpecUtils::describe(forwarded.matcher); break; } +#endif if (DataSpecUtils::partialMatch(forwarded.matcher, o2::header::DataDescription{"RAWDATA"}) && mProcessingPolicies.earlyForward == EarlyForwardPolicy::NORAW) { context.canForwardEarly = false; overriddenEarlyForward = true; @@ -2230,7 +2235,15 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v auto nofPartsGetter = [¤tSetOfInputs](size_t i) -> size_t { return currentSetOfInputs[i].getNumberOfPairs(); }; - return InputSpan{getter, nofPartsGetter, currentSetOfInputs.size()}; +#if __has_include() + auto refCountGetter = [¤tSetOfInputs](size_t idx) -> int { + auto& header = static_cast(*currentSetOfInputs[idx].header(0)); + return header.GetRefCount(); + }; +#else + std::function refCountGetter = nullptr; +#endif + return InputSpan{getter, nofPartsGetter, refCountGetter, currentSetOfInputs.size()}; }; auto markInputsAsDone = [ref](TimesliceSlot slot) -> void { diff --git a/Framework/Core/src/DataRelayer.cxx b/Framework/Core/src/DataRelayer.cxx index 385d9a6c50c4a..f30866dc0aa1b 100644 --- a/Framework/Core/src/DataRelayer.cxx +++ b/Framework/Core/src/DataRelayer.cxx @@ -44,6 +44,10 @@ #include #include +#include +#if __has_include() +#include +#endif #include #include #include @@ -209,7 +213,15 @@ DataRelayer::ActivityStats DataRelayer::processDanglingInputs(std::vector(partial.size())}; +#if __has_include() + auto refCountGetter = [&partial](size_t idx) -> int { + auto& header = static_cast(*partial[idx].header(0)); + return header.GetRefCount(); + }; +#else + std::function refCountGetter = nullptr; +#endif + InputSpan span{getter, nPartsGetter, refCountGetter, static_cast(partial.size())}; // Setup the input span if (expirator.checker(services, timestamp.value, span) == false) { @@ -755,7 +767,15 @@ void DataRelayer::getReadyToProcess(std::vector& comp auto nPartsGetter = [&partial](size_t idx) { return partial[idx].size(); }; - InputSpan span{getter, nPartsGetter, static_cast(partial.size())}; +#if __has_include() + auto refCountGetter = [&partial](size_t idx) -> int { + auto& header = static_cast(*partial[idx].header(0)); + return header.GetRefCount(); + }; +#else + std::function refCountGetter = nullptr; +#endif + InputSpan span{getter, nPartsGetter, refCountGetter, static_cast(partial.size())}; CompletionPolicy::CompletionOp action = mCompletionPolicy.callbackFull(span, mInputs, mContext); auto& variables = mTimesliceIndex.getVariablesForSlot(slot); diff --git a/Framework/Core/src/InputSpan.cxx b/Framework/Core/src/InputSpan.cxx index 510b55cd0b9b9..d1dffc85602a5 100644 --- a/Framework/Core/src/InputSpan.cxx +++ b/Framework/Core/src/InputSpan.cxx @@ -29,8 +29,11 @@ InputSpan::InputSpan(std::function getter, size_t size) { } -InputSpan::InputSpan(std::function getter, std::function nofPartsGetter, size_t size) - : mGetter{getter}, mNofPartsGetter{nofPartsGetter}, mSize{size} +InputSpan::InputSpan(std::function getter, + std::function nofPartsGetter, + std::function refCountGetter, + size_t size) + : mGetter{getter}, mNofPartsGetter{nofPartsGetter}, mRefCountGetter(refCountGetter), mSize{size} { } diff --git a/Framework/Core/test/test_InputRecordWalker.cxx b/Framework/Core/test/test_InputRecordWalker.cxx index 5b9004a1a9366..9af3c0dd2dbe2 100644 --- a/Framework/Core/test/test_InputRecordWalker.cxx +++ b/Framework/Core/test/test_InputRecordWalker.cxx @@ -42,7 +42,7 @@ struct DataSet { auto payload = static_cast(this->messages[i].second.at(2 * part + 1)->data()); return DataRef{nullptr, header, payload}; }, - [this](size_t i) { return i < this->messages.size() ? messages[i].second.size() / 2 : 0; }, this->messages.size()}, + [this](size_t i) { return i < this->messages.size() ? messages[i].second.size() / 2 : 0; }, nullptr, this->messages.size()}, record{schema, span, registry}, values{std::move(v)} { diff --git a/Framework/Core/test/test_InputSpan.cxx b/Framework/Core/test/test_InputSpan.cxx index 0622ad898d249..c5682aea80b6c 100644 --- a/Framework/Core/test/test_InputSpan.cxx +++ b/Framework/Core/test/test_InputSpan.cxx @@ -37,7 +37,7 @@ TEST_CASE("TestInputSpan") return inputs[i].size() / 2; }; - InputSpan span{getter, nPartsGetter, inputs.size()}; + InputSpan span{getter, nPartsGetter, nullptr, inputs.size()}; REQUIRE(span.size() == inputs.size()); routeNo = 0; for (; routeNo < span.size(); ++routeNo) { diff --git a/Framework/Utils/test/RawPageTestData.h b/Framework/Utils/test/RawPageTestData.h index 684fc4d0cf8a3..a6b800f7cba32 100644 --- a/Framework/Utils/test/RawPageTestData.h +++ b/Framework/Utils/test/RawPageTestData.h @@ -47,7 +47,9 @@ struct DataSet { auto payload = static_cast(this->messages[i].at(2 * part + 1)->data()); return DataRef{nullptr, header, payload}; }, - [this](size_t i) { return i < this->messages.size() ? messages[i].size() / 2 : 0; }, this->messages.size()}, + [this](size_t i) { return i < this->messages.size() ? messages[i].size() / 2 : 0; }, + nullptr, + this->messages.size()}, record{schema, span, registry}, values{std::move(v)} { @@ -63,5 +65,5 @@ struct DataSet { using AmendRawDataHeader = std::function; DataSet createData(std::vector const& inputspecs, std::vector const& dataheaders, AmendRawDataHeader amendRdh = nullptr); -} // namespace o2::framework +} // namespace o2::framework::test #endif // FRAMEWORK_UTILS_RAWPAGETESTDATA_H From 7263f799ce8c3b660d0595fb95efeaef9b8c2f6c Mon Sep 17 00:00:00 2001 From: Matthias Kleiner Date: Thu, 20 Mar 2025 09:17:59 +0100 Subject: [PATCH 0113/1764] TPC: Adding check for empty IDCs --- .../include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h index 5fe0d6a442dd5..35f51dd489115 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h @@ -257,6 +257,11 @@ class TPCFourierTransformAggregatorSpec : public o2::framework::Task void makeTPCScaler(DataAllocator& output, const bool eos) { LOGP(info, "Making TPC scalers"); + if (mTPCScalerCont.idcs.empty()) { + LOGP(warning, "No IDCs received for TPC scaler creation"); + return; + } + // check if IDC scalers can be created - check length of continous received IDCs std::vector> times; times.reserve(mTPCScalerCont.idcs.size()); From 42d5ae19a64f14c858496dd77dd1f6b5520c3415 Mon Sep 17 00:00:00 2001 From: swenzel Date: Thu, 20 Mar 2025 15:07:16 +0100 Subject: [PATCH 0114/1764] remove deprecated confkey value --- Common/SimConfig/include/SimConfig/SimParams.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Common/SimConfig/include/SimConfig/SimParams.h b/Common/SimConfig/include/SimConfig/SimParams.h index 2c103f43b2b04..cf3ee2b01cf2e 100644 --- a/Common/SimConfig/include/SimConfig/SimParams.h +++ b/Common/SimConfig/include/SimConfig/SimParams.h @@ -36,7 +36,6 @@ struct SimCutParams : public o2::conf::ConfigurableParamHelper { float maxRTrackingZDC = 50; // R-cut applied in the tunnel leading to ZDC when z > beampipeZ (custom stepping function) float tunnelZ = 1900; // Z-value from where we apply maxRTrackingZDC (default value taken from standard "hall" dimensions) - float globalDensityFactor = 1.f; // global factor that scales all material densities for systematic studies bool lowneut = false; O2ParamDef(SimCutParams, "SimCutParams"); }; @@ -44,7 +43,7 @@ struct SimCutParams : public o2::conf::ConfigurableParamHelper { // parameter influencing material manager struct SimMaterialParams : public o2::conf::ConfigurableParamHelper { // Local density value takes precedence over global density value, i.e. local values overwrite the global value. - float globalDensityFactor = 1.f; + float globalDensityFactor = 1.f; // global factor that scales all material densities for systematic studies std::string localDensityFactor; // Expected format: "SimMaterialParams.localDensityFactor=:,:,..." O2ParamDef(SimMaterialParams, "SimMaterialParams"); From 97aeb5f7be62d251c196ea9c2d354b119bb8f29c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 22 Mar 2025 02:26:50 +0100 Subject: [PATCH 0115/1764] Common: allow to write c-style arrays in TreeStream Signed-off-by: Felix Schlepper --- Common/Utils/include/CommonUtils/TreeStream.h | 162 +++++++++--------- Common/Utils/src/TreeStream.cxx | 38 +++- Common/Utils/test/testTreeStream.cxx | 42 ++++- 3 files changed, 148 insertions(+), 94 deletions(-) diff --git a/Common/Utils/include/CommonUtils/TreeStream.h b/Common/Utils/include/CommonUtils/TreeStream.h index 2aa02f6509d2c..d1d4527ffc99d 100644 --- a/Common/Utils/include/CommonUtils/TreeStream.h +++ b/Common/Utils/include/CommonUtils/TreeStream.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include "GPUCommonDef.h" class TBranch; @@ -39,10 +41,79 @@ namespace utils /// /// See testTreeStream.cxx for functional example /// +namespace details +{ +template +struct IsTrivialRootType { + static constexpr bool value = + std::is_same_v || // Float_t + std::is_same_v || // Double_t + std::is_same_v || std::is_same_v || // ULong64_t or ULong_t + std::is_same_v || std::is_same_v || // Long64_t or Long_t + std::is_same_v || // UInt_t + std::is_same_v || // Int_t + std::is_same_v || // UShort_t + std::is_same_v || // Short_t + std::is_same_v || // UChar_t + std::is_same_v || std::is_same_v || std::is_same_v; // Char_t, int8_t, or Bool_t +}; + +template +struct IsTrivialRootType { + static constexpr bool value = IsTrivialRootType::value; +}; + +template +struct IsTrivialRootType { + static constexpr bool value = IsTrivialRootType::value; +}; + +template +concept TrivialRootType = IsTrivialRootType::value; + +template +concept ComplexRootType = !IsTrivialRootType::value; + +template +static constexpr char getRootTypeCode() +{ + if constexpr (std::is_array_v) { + return getRootTypeCode>(); + } else if constexpr (std::is_same_v) { + return 'F'; + } else if constexpr (std::is_same_v) { + return 'D'; + } else if constexpr (std::is_same_v || + std::is_same_v) { + return 'l'; + } else if constexpr (std::is_same_v || + std::is_same_v) { + return 'L'; + } else if constexpr (std::is_same_v) { + return 'i'; + } else if constexpr (std::is_same_v) { + return 'I'; + } else if constexpr (std::is_same_v) { + return 's'; + } else if constexpr (std::is_same_v) { + return 'S'; + } else if constexpr (std::is_same_v) { + return 'b'; + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v) { + return 'B'; + } else { + static_assert(false, "unsupported type!"); + } +} +} // namespace details + class TreeStream { public: struct TreeDataElement { + int arsize = 1; ///< size of array char type = 0; ///< type of data element const TClass* cls = nullptr; ///< data type pointer const void* ptr = nullptr; ///< pointer to element @@ -64,87 +135,10 @@ class TreeStream void setID(int id) { mID = id; } int getID() const { return mID; } - TreeStream& operator<<(const Bool_t& b) - { - CheckIn('B', &b); - return *this; - } - - TreeStream& operator<<(const Char_t& c) - { - CheckIn('B', &c); - return *this; - } - - TreeStream& operator<<(const int8_t& i) - { - CheckIn('B', &i); - return *this; - } - - TreeStream& operator<<(const UChar_t& c) - { - CheckIn('b', &c); - return *this; - } - - TreeStream& operator<<(const Short_t& h) - { - CheckIn('S', &h); - return *this; - } - - TreeStream& operator<<(const UShort_t& h) - { - CheckIn('s', &h); - return *this; - } - - TreeStream& operator<<(const Int_t& i) - { - CheckIn('I', &i); - return *this; - } - - TreeStream& operator<<(const UInt_t& i) - { - CheckIn('i', &i); - return *this; - } - - TreeStream& operator<<(const Long_t& l) - { - CheckIn('L', &l); - return *this; - } - - TreeStream& operator<<(const ULong_t& l) - { - CheckIn('l', &l); - return *this; - } - - TreeStream& operator<<(const Long64_t& l) - { - CheckIn('L', &l); - return *this; - } - - TreeStream& operator<<(const ULong64_t& l) - { - CheckIn('l', &l); - return *this; - } - - TreeStream& operator<<(const Float_t& f) - { - CheckIn('F', &f); - return *this; - } - - TreeStream& operator<<(const Double_t& d) + template + TreeStream& operator<<(const T& t) { - CheckIn('D', &d); + CheckIn(details::getRootTypeCode(), &t); return *this; } @@ -157,7 +151,7 @@ class TreeStream return *this; } - template ::value, bool>::type* = nullptr> + template ::value, bool>::type* = nullptr> TreeStream& operator<<(const T& obj) { CheckIn(&obj); @@ -175,6 +169,7 @@ class TreeStream int mCurrentIndex = 0; ///< index of current element int mID = -1; ///< identifier of layout int mNextNameCounter = 0; ///< next name counter + int mNextArraySize = 0; ///< next array size int mStatus = 0; ///< status of the layout TString mNextName; ///< name for next entry @@ -191,8 +186,7 @@ Int_t TreeStream::CheckIn(const T* obj) } if (mCurrentIndex >= static_cast(mElements.size())) { - mElements.emplace_back(); - auto& element = mElements.back(); + auto& element = mElements.emplace_back(); element.cls = pClass; TString name = mNextName; if (name.Length()) { @@ -204,6 +198,8 @@ Int_t TreeStream::CheckIn(const T* obj) } element.name = name.Data(); element.ptr = obj; + element.arsize = mNextArraySize; + mNextArraySize = 1; // reset } else { auto& element = mElements[mCurrentIndex]; if (!element.cls) { diff --git a/Common/Utils/src/TreeStream.cxx b/Common/Utils/src/TreeStream.cxx index da25f25ad2eb1..cd0641a11d043 100644 --- a/Common/Utils/src/TreeStream.cxx +++ b/Common/Utils/src/TreeStream.cxx @@ -29,8 +29,7 @@ int TreeStream::CheckIn(Char_t type, const void* pointer) // Insert object if (mCurrentIndex >= static_cast(mElements.size())) { - mElements.emplace_back(); - auto& element = mElements.back(); + auto& element = mElements.emplace_back(); element.type = type; TString name = mNextName; if (name.Length()) { @@ -42,6 +41,8 @@ int TreeStream::CheckIn(Char_t type, const void* pointer) } element.name = name.Data(); element.ptr = pointer; + element.arsize = mNextArraySize; + mNextArraySize = 1; // reset } else { auto& element = mElements[mCurrentIndex]; if (element.type != type) { @@ -89,7 +90,13 @@ void TreeStream::BuildTree() } if (element.type > 0) { - TString nameC = TString::Format("%s/%c", name.Data(), element.type); + TString nameC; + if (element.arsize > 1) { + nameC = TString::Format("%s[%d]/%c", name.Data(), element.arsize, + element.type); + } else { + nameC = TString::Format("%s/%c", name.Data(), element.type); + } br = mTree.Branch(name.Data(), const_cast(element.ptr), nameC.Data()); if (entriesFilled) { br->SetAddress(nullptr); @@ -148,28 +155,43 @@ TreeStream& TreeStream::Endl() TreeStream& TreeStream::operator<<(const Char_t* name) { // Stream the branch name - // if (name[0] == '\n') { return Endl(); } - // + // if tree was already defined ignore if (mTree.GetEntries() > 0) { return *this; } + + int arsize = 1; + // check branch name if tree was not - // Int_t last = 0; for (last = 0;; last++) { if (name[last] == 0) { break; } } - if (last > 0 && name[last - 1] == '=') { mNextName = name; - mNextName[last - 1] = 0; + mNextName[last - 1] = 0; // remove '=' from string mNextNameCounter = 0; + + TString inName{name}; + auto brkStaPos = inName.Index('['); + + if (brkStaPos != kNPOS) { + auto brkEndPos = inName.Index(']'); + if (brkEndPos != kNPOS && brkEndPos > brkStaPos + 1) { + TString size = inName(brkStaPos + 1, brkEndPos - brkStaPos - 1); + arsize = size.Atoi(); + mNextName = inName(0, brkStaPos); // use parsed name + } + } } + + mNextArraySize = arsize; + return *this; } diff --git a/Common/Utils/test/testTreeStream.cxx b/Common/Utils/test/testTreeStream.cxx index 7ff6f165a1d37..2491fea7f6efd 100644 --- a/Common/Utils/test/testTreeStream.cxx +++ b/Common/Utils/test/testTreeStream.cxx @@ -53,12 +53,28 @@ BOOST_AUTO_TEST_CASE(TreeStream_test) tstStream << "TrackTreeR" << "id=" << i << "x=" << x << "track=" << trc << "\n"; } + + // test for c-arrays + int iArray[6] = {1, 2, 3, 4, 5, 6}; + float fArray[6] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f}; + for (int i{0}; i < nit; ++i) { + for (int j{0}; j < 6; ++j) { + iArray[j] += i; + fArray[j] += (float)i; + } + tstStream << "ArrayTree" + << "id=" << i + << "iArray[6]=" << iArray + << "fArray[6]=" << fArray + << "\n"; + } + // on destruction of tstTreem the trees will be stored, but we can also force it by tstStream.Close(); } // - LOG(info) << "Testing reading back tree maid by the TreeStream "; - // read back tracks + LOG(info) << "Testing reading back tree made by the TreeStream "; + // read back tracks and arrays { TFile inpf(outFName.data()); BOOST_CHECK(!inpf.IsZombie()); @@ -80,6 +96,27 @@ BOOST_AUTO_TEST_CASE(TreeStream_test) trc->printParam(); BOOST_CHECK(std::abs(x - trc->getX()) < 1e-4); } + + // check arrays + tree = (TTree*)inpf.GetObjectChecked("ArrayTree", "TTree"); + BOOST_CHECK(tree); + nent = tree->GetEntries(); + BOOST_CHECK(nent == nit); + int iArray[6]; + float fArray[6]; + BOOST_CHECK(!tree->SetBranchAddress("id", &id)); + BOOST_CHECK(!tree->SetBranchAddress("iArray", iArray)); + BOOST_CHECK(!tree->SetBranchAddress("fArray", fArray)); + for (int i = 0; i < nit; i++) { + BOOST_CHECK(tree->GetEntry(i) > 0); + BOOST_CHECK(id == i); + for (int j = 0; j < 6; j++) { + BOOST_CHECK(iArray[j] == (1 + j + i * (i + 1) / 2)); + } + for (int j = 0; j < 6; j++) { + BOOST_CHECK_CLOSE(fArray[j], (1.f + j + i * (i + 1) / 2.f + 0.1 * (j + 1)), 1e-5); + } + } } LOG(info) << "Testing loading tree via RootChain"; @@ -104,7 +141,6 @@ BOOST_AUTO_TEST_CASE(TreeStream_test) nit = 1000; BOOST_CHECK(UnitTestSparse(0.5, nit)); BOOST_CHECK(UnitTestSparse(0.1, nit)); - // } //_________________________________________________ From 6e8b562d7164cc2dd9892ac9a56847e98ef04cab Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:12:57 +0100 Subject: [PATCH 0116/1764] DPL: cleanup creation of DataProcessorInfo (#14096) - Use aggregate initialisation where possible - Drop unused bits Will simplify the plugins PR. --- Framework/Core/test/test_DataAllocator.cxx | 1 - .../Core/test/test_DeviceSpecHelpers.cxx | 31 +++++++++---------- .../Core/test/test_FrameworkDataFlowToDDS.cxx | 16 +++++----- .../test_FrameworkDataFlowToO2Control.cxx | 8 ++--- .../Core/test/test_WorkflowSerialization.cxx | 10 +++--- 5 files changed, 32 insertions(+), 34 deletions(-) diff --git a/Framework/Core/test/test_DataAllocator.cxx b/Framework/Core/test/test_DataAllocator.cxx index acdae51cab8e9..fefb6438b98d5 100644 --- a/Framework/Core/test/test_DataAllocator.cxx +++ b/Framework/Core/test/test_DataAllocator.cxx @@ -73,7 +73,6 @@ DataProcessorSpec getSourceSpec() { static_assert(enable_root_serialization::value, "enable_root_serialization must be true"); auto processingFct = [](ProcessingContext& pc) { - static int counter = 0; o2::test::TriviallyCopyable a(42, 23, 0xdead); o2::test::Polymorphic b(0xbeef); std::vector c{{0xaffe}, {0xd00f}}; diff --git a/Framework/Core/test/test_DeviceSpecHelpers.cxx b/Framework/Core/test/test_DeviceSpecHelpers.cxx index 6240e784d09d3..4184be848c5ec 100644 --- a/Framework/Core/test/test_DeviceSpecHelpers.cxx +++ b/Framework/Core/test/test_DeviceSpecHelpers.cxx @@ -16,7 +16,6 @@ #include "Framework/DriverConfig.h" #include "../src/DeviceSpecHelpers.h" #include -#include #include #include #include @@ -67,10 +66,10 @@ void check(const std::vector& arguments, std::vector dataProcessorInfos; for (auto& [name, _] : matrix) { dataProcessorInfos.push_back(DataProcessorInfo{ - name, - "executable-name", - arguments, - workflowOptions, + .name = name, + .executable = "executable-name", + .cmdLineArgs = arguments, + .workflowOptions = workflowOptions, }); } DriverConfig driverConfig{}; @@ -184,7 +183,7 @@ TEST_CASE("CheckOptionReworking") { { std::vector infos = { - {{}, {}, {"--driver-client-backend", "foo"}}, + {.cmdLineArgs = {"--driver-client-backend", "foo"}}, {}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[1] == "foo"); @@ -192,30 +191,30 @@ TEST_CASE("CheckOptionReworking") } { std::vector infos = { - {{}, {}, {"--driver-client-backend", "foo"}}, - {{}, {}, {"--driver-client-backend", "bar"}}}; + {.cmdLineArgs = {"--driver-client-backend", "foo"}}, + {.cmdLineArgs = {"--driver-client-backend", "bar"}}}; REQUIRE_THROWS_AS(DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"), o2::framework::RuntimeErrorRef); } { std::vector infos = { - {{}, {}, {"--driver-client-backend", "foo"}}, - {{}, {}, {"--driver-client-backend", "foo"}}}; + {.cmdLineArgs = {"--driver-client-backend", "foo"}}, + {.cmdLineArgs = {"--driver-client-backend", "foo"}}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[1] == "foo"); REQUIRE(infos[1].cmdLineArgs[1] == "foo"); } { std::vector infos = { - {{}, {}, {"foo", "bar"}}, - {{}, {}, {"fnjcnak", "foo"}}}; + {.cmdLineArgs = {"foo", "bar"}}, + {.cmdLineArgs = {"fnjcnak", "foo"}}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[3] == "stdout://"); REQUIRE(infos[1].cmdLineArgs[3] == "stdout://"); } { std::vector infos = { - {{}, {}, {"foo", "bar", "--driver-client-backend", "bar"}}, - {{}, {}, {"fnjcnak", "foo"}}}; + {.cmdLineArgs = {"foo", "bar", "--driver-client-backend", "bar"}}, + {.cmdLineArgs = {"fnjcnak", "foo"}}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[3] == "bar"); REQUIRE(infos[1].cmdLineArgs[3] == "bar"); @@ -277,8 +276,8 @@ TEST_CASE("CheckIntegerReworking") } { std::vector infos = { - {{}, {}, {"foo", "bar", "--readers", "3"}}, - {{}, {}, {"--readers", "2"}}}; + {.cmdLineArgs = {"foo", "bar", "--readers", "3"}}, + {.cmdLineArgs = {"--readers", "2"}}}; DeviceSpecHelpers::reworkIntegerOption( infos, "--readers", []() { return 1; }, 1, [](long long x, long long y) { return x > y ? x : y; }); REQUIRE(infos[0].cmdLineArgs.size() == 4); diff --git a/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx b/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx index 593728696e77a..dd3b2eb80d253 100644 --- a/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx +++ b/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx @@ -142,10 +142,10 @@ TEST_CASE("TestDDS") std::vector dataProcessorInfos = { { - {"A", "bcsadc/foo", {}, workflowOptions}, - {"B", "foo", {}, workflowOptions}, - {"C", "foo", {}, workflowOptions}, - {"D", "foo", {}, workflowOptions}, + {.name = "A", .executable = "bcsadc/foo", .workflowOptions = workflowOptions}, + {.name = "B", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "C", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "D", .executable = "foo", .workflowOptions = workflowOptions}, }}; DriverConfig driverConfig = { .batch = true, @@ -406,10 +406,10 @@ TEST_CASE("TestDDSExpendable") std::vector dataProcessorInfos = { { - {"A", "bcsadc/foo", {}, workflowOptions}, - {"B", "foo", {}, workflowOptions}, - {"C", "foo", {}, workflowOptions}, - {"D", "foo", {}, workflowOptions}, + {.name = "A", .executable = "bcsadc/foo", .workflowOptions = workflowOptions}, + {.name = "B", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "C", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "D", .executable = "foo", .workflowOptions = workflowOptions}, }}; DriverConfig driverConfig = { .batch = true, diff --git a/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx b/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx index d5f402aa16caa..9cdbc357f9674 100644 --- a/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx +++ b/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx @@ -561,10 +561,10 @@ TEST_CASE("TestO2ControlDump") std::vector dataProcessorInfos = { { - {"A", "bcsadc/foo", {}, workflowOptions}, - {"B", "foo", {}, workflowOptions}, - {"C", "foo", {}, workflowOptions}, - {"D", "foo", {}, workflowOptions}, + {.name = "A", .executable = "bcsadc/foo", .workflowOptions = workflowOptions}, + {.name = "B", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "C", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "D", .executable = "foo", .workflowOptions = workflowOptions}, }}; DriverConfig driverConfig{ diff --git a/Framework/Core/test/test_WorkflowSerialization.cxx b/Framework/Core/test/test_WorkflowSerialization.cxx index 6e541f7d22f07..298956970713d 100644 --- a/Framework/Core/test/test_WorkflowSerialization.cxx +++ b/Framework/Core/test/test_WorkflowSerialization.cxx @@ -52,10 +52,10 @@ TEST_CASE("TestVerifyWorkflowSerialization") {{"key1", "v\"al'1"}, {"", "val2"}, {"key3", ""}, {"", ""}}}}; std::vector dataProcessorInfoOut{ - {"A", "test_Framework_test_SerializationWorkflow", {"foo"}, {ConfigParamSpec{"aBool", VariantType::Bool, true, {"A Bool"}}}}, - {"B", "test_Framework_test_SerializationWorkflow", {"b-bar", "bfoof", "fbdbfaso"}}, - {"C", "test_Framework_test_SerializationWorkflow", {}}, - {"D", "test_Framework_test_SerializationWorkflow", {}}, + {.name = "A", .executable = "test_Framework_test_SerializationWorkflow", .cmdLineArgs = {"foo"}, .workflowOptions = {ConfigParamSpec{"aBool", VariantType::Bool, true, {"A Bool"}}}}, + {.name = "B", .executable = "test_Framework_test_SerializationWorkflow", .cmdLineArgs = {"b-bar", "bfoof", "fbdbfaso"}}, + {.name = "C", .executable = "test_Framework_test_SerializationWorkflow"}, + {.name = "D", .executable = "test_Framework_test_SerializationWorkflow"}, }; CommandInfo commandInfoOut{"o2-dpl-workflow -b --option 1 --option 2"}; @@ -94,7 +94,7 @@ TEST_CASE("TestVerifyWildcard") }}; std::vector dataProcessorInfoOut{ - {"A", "test_Framework_test_SerializationWorkflow", {}}, + {.name = "A", .executable = "test_Framework_test_SerializationWorkflow"}, }; CommandInfo commandInfoOut{"o2-dpl-workflow -b --option 1 --option 2"}; From f882cd3f686a6dba7bc0993aca3e936b63f8cd96 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 20 Mar 2025 18:57:56 +0100 Subject: [PATCH 0117/1764] GPU Standalone: Modernize CMake, get rid of legacy include_directories(...) and add_definitions(...) --- GPU/GPUTracking/CMakeLists.txt | 54 ++++--- .../Standalone/Benchmark/CMakeLists.txt | 2 + GPU/GPUTracking/Standalone/CMakeLists.txt | 150 ++++++++---------- GPU/TPCFastTransformation/CMakeLists.txt | 2 + 4 files changed, 98 insertions(+), 110 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 5ce96d450f765..e69e11c91d157 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -262,6 +262,27 @@ string(REPLACE ".cxx" ".h" HDRS_TMP "${SRCS_DATATYPES}") set(HDRS_CINT_DATATYPES ${HDRS_CINT_DATATYPES} ${HDRS_TMP}) unset(HDRS_TMP) +set(INCDIRS + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/Definitions + ${CMAKE_CURRENT_SOURCE_DIR}/DataTypes + ${CMAKE_CURRENT_SOURCE_DIR}/Base + ${CMAKE_CURRENT_SOURCE_DIR}/SectorTracker + ${CMAKE_CURRENT_SOURCE_DIR}/TPCConvert + ${CMAKE_CURRENT_SOURCE_DIR}/dEdx + ${CMAKE_CURRENT_SOURCE_DIR}/ITS + ${CMAKE_CURRENT_SOURCE_DIR}/TRDTracking + ${CMAKE_CURRENT_SOURCE_DIR}/qa + ${CMAKE_CURRENT_SOURCE_DIR}/Global + ${CMAKE_CURRENT_SOURCE_DIR}/HLTHeaders + ${CMAKE_CURRENT_SOURCE_DIR}/Interface + ${CMAKE_CURRENT_SOURCE_DIR}/Merger + ${CMAKE_CURRENT_SOURCE_DIR}/Refit + ${CMAKE_CURRENT_SOURCE_DIR}/Debug + ${CMAKE_CURRENT_SOURCE_DIR}/DataCompression + ${CMAKE_CURRENT_SOURCE_DIR}/TPCClusterFinder + ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) + # Main CMake part for O2 if(ALIGPU_BUILD_TYPE STREQUAL "O2") o2_add_library(GPUDataTypes @@ -297,25 +318,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML - PUBLIC_INCLUDE_DIRECTORIES . - Definitions - DataTypes - Base - SectorTracker - TPCConvert - dEdx - ITS - TRDTracking - qa - Global - HLTHeaders - Interface - Merger - Refit - Debug - DataCompression - TPCClusterFinder - ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly + PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) target_include_directories( @@ -349,15 +352,18 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H} ${SRCS_DATATYPES}) set(targetName ${MODULE}) add_library(O2::${MODULE} ALIAS ${MODULE}) - install(TARGETS ${MODULE}) + install(TARGETS ${targetName}) + target_link_libraries(${targetName} PUBLIC TPCFastTransformation) + target_include_directories(${targetName} PUBLIC ${INCDIRS}) if(GPUCA_CONFIG_ROOT) - ROOT_GENERATE_DICTIONARY(G__${MODULE} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} GPUTrackingLinkDef_Standalone.h) - target_sources(${MODULE} PRIVATE G__${MODULE}) + ROOT_GENERATE_DICTIONARY(G__${targetName} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} GPUTrackingLinkDef_Standalone.h MODULE ${targetName}) INSTALL(FILES - ${CMAKE_CURRENT_BINARY_DIR}/lib${MODULE}_rdict.pcm - ${CMAKE_CURRENT_BINARY_DIR}/lib${MODULE}.rootmap + ${CMAKE_CURRENT_BINARY_DIR}/lib${targetName}_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/lib${targetName}.rootmap DESTINATION .) + else() + target_compile_definitions(${targetName} PRIVATE GPUCA_NO_ROOT) endif() endif() diff --git a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt index de2d683036193..e418d94b62cb2 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt @@ -27,6 +27,8 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_executable(ca ${SRCS}) set(targetName ca) + target_link_libraries(${targetName} PUBLIC GPUTracking) + endif() target_compile_definitions(${targetName} PUBLIC $) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 5ed511173f53e..cf5a447c26669 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -80,7 +80,6 @@ if(GPUCA_CONFIG_VC) find_package(Vc REQUIRED) else() set(Vc_FOUND OFF) - add_definitions(-DGPUCA_NO_VC) endif() if(GPUCA_BUILD_EVENT_DISPLAY) @@ -111,118 +110,97 @@ else() set(OpenGL_FOUND OFF) endif() -if(GPUCA_CONFIG_O2) - add_definitions(-DGPUCA_TPC_GEOMETRY_O2) -endif() - if(GPUCA_CONFIG_ROOT) find_package(ROOT CONFIG REQUIRED) -else() - add_definitions(-DGPUCA_NO_ROOT) endif() find_package(Microsoft.GSL REQUIRED HINTS "$ENV{MS_GSL_ROOT}/share/cmake") if(GPUCA_CONFIG_FMT) find_package(fmt REQUIRED HINTS $ENV{FMT_ROOT}) else() - add_definitions(-DGPUCA_NO_FMT) + set(fmt_FOUND OFF) endif() # Detect GPU Backends find_package(O2GPU) -# Global include directories -include_directories(${GPU_DIR}/Common - ${GPU_DIR}/Utils - ${GPU_DIR}/TPCFastTransformation - ${GPUTRACKING_DIR} - ${GPUTRACKING_DIR}/Debug - ${GPUTRACKING_DIR}/Definitions - ${GPUTRACKING_DIR}/DataTypes - ${GPUTRACKING_DIR}/Base - ${GPUTRACKING_DIR}/dEdx - ${GPUTRACKING_DIR}/TPCConvert - ${GPUTRACKING_DIR}/Global - ${GPUTRACKING_DIR}/HLTHeaders - ${GPUTRACKING_DIR}/Merger - ${GPUTRACKING_DIR}/Refit - ${GPUTRACKING_DIR}/qa - ${GPUTRACKING_DIR}/SectorTracker - ${GPUTRACKING_DIR}/DataCompression - ${GPUTRACKING_DIR}/TRDTracking - ${GPUTRACKING_DIR}/TPCClusterFinder - ${GPUTRACKING_DIR}/ITS - ${GPUTRACKING_DIR}/Interface - ${O2_DIR}/Common/Field/include - ${O2_DIR}/Common/Constants/include - ${O2_DIR}/Common/MathUtils/include - ${O2_DIR}/Common/Utils/include - ${O2_DIR}/DataFormats/common/include - ${O2_DIR}/DataFormats/Detectors/Common/include - ${O2_DIR}/DataFormats/Detectors/ITSMFT/common/include - ${O2_DIR}/DataFormats/Detectors/ITSMFT/ITS/include - ${O2_DIR}/DataFormats/Detectors/TOF/include - ${O2_DIR}/DataFormats/Detectors/TPC/include - ${O2_DIR}/DataFormats/Detectors/TRD/include - ${O2_DIR}/DataFormats/Headers/include - ${O2_DIR}/DataFormats/MemoryResources/include - ${O2_DIR}/DataFormats/Reconstruction/include - ${O2_DIR}/DataFormats/Reconstruction/src - ${O2_DIR}/DataFormats/simulation/include - ${O2_DIR}/Detectors/Base/include - ${O2_DIR}/Detectors/Base/src - ${O2_DIR}/Detectors/ITSMFT/common/base/include - ${O2_DIR}/Detectors/ITSMFT/ITS/base/include - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/include - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/cuda - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/hip - ${O2_DIR}/Detectors/Raw/include - ${O2_DIR}/Detectors/TOF/base/include - ${O2_DIR}/Detectors/TPC/base/include - ${O2_DIR}/Detectors/TRD/base/include - ${O2_DIR}/Detectors/TRD/base/src - ${O2_DIR}/Framework/Foundation/3rdparty/include) - # Create main targets add_subdirectory(../../ GPU) -add_library(standalone_support SHARED ${GPUTRACKING_DIR}/utils/EmptyFile.cxx) +add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx + ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx + ${O2_DIR}/Detectors/Base/src/Ray.cxx + ${O2_DIR}/Detectors/Base/src/Propagator.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) + target_compile_definitions(standalone_support PUBLIC $) +target_include_directories(standalone_support PUBLIC $) + +target_include_directories(standalone_support PUBLIC + ${GPU_DIR}/Common + ${GPU_DIR}/Utils + ${O2_DIR}/Common/Field/include + ${O2_DIR}/Common/Constants/include + ${O2_DIR}/Common/MathUtils/include + ${O2_DIR}/Common/Utils/include + ${O2_DIR}/DataFormats/common/include + ${O2_DIR}/DataFormats/Detectors/Common/include + ${O2_DIR}/DataFormats/Detectors/ITSMFT/common/include + ${O2_DIR}/DataFormats/Detectors/ITSMFT/ITS/include + ${O2_DIR}/DataFormats/Detectors/TOF/include + ${O2_DIR}/DataFormats/Detectors/TPC/include + ${O2_DIR}/DataFormats/Detectors/TRD/include + ${O2_DIR}/DataFormats/Headers/include + ${O2_DIR}/DataFormats/MemoryResources/include + ${O2_DIR}/DataFormats/Reconstruction/include + ${O2_DIR}/DataFormats/Reconstruction/src + ${O2_DIR}/DataFormats/simulation/include + ${O2_DIR}/Detectors/Base/include + ${O2_DIR}/Detectors/Base/src + ${O2_DIR}/Detectors/ITSMFT/common/base/include + ${O2_DIR}/Detectors/ITSMFT/ITS/base/include + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/include + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/cuda + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/hip + ${O2_DIR}/Detectors/Raw/include + ${O2_DIR}/Detectors/TOF/base/include + ${O2_DIR}/Detectors/TPC/base/include + ${O2_DIR}/Detectors/TRD/base/include + ${O2_DIR}/Detectors/TRD/base/src + ${O2_DIR}/Framework/Foundation/3rdparty/include) target_link_libraries(standalone_support PUBLIC# dl pthread - Microsoft.GSL::GSL - TPCFastTransformation) -target_link_libraries(GPUTracking PUBLIC TPCFastTransformation standalone_support) -target_link_libraries(ca PUBLIC GPUTracking) - -# Add all sources and dependencies to to support based on Config File -target_sources(standalone_support PRIVATE - ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx - ${O2_DIR}/Detectors/Base/src/Ray.cxx - ${O2_DIR}/Detectors/Base/src/Propagator.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) + Microsoft.GSL::GSL) + +target_link_libraries(GPUTracking PUBLIC standalone_support) +target_link_libraries(TPCFastTransformation PUBLIC standalone_support) + +if(GPUCA_CONFIG_O2) + target_compile_definitions(GPUTracking PRIVATE GPUCA_TPC_GEOMETRY_O2) +endif() if(GPUCA_CONFIG_FMT) target_link_libraries(standalone_support PUBLIC fmt::fmt) - target_link_libraries(TPCFastTransformation PUBLIC fmt::fmt) +else() + target_compile_definitions(standalone_support PUBLIC GPUCA_NO_FMT) endif() if(GPUCA_CONFIG_VC) target_link_libraries(standalone_support PUBLIC Vc::Vc) - target_link_libraries(TPCFastTransformation PUBLIC Vc::Vc) +else() + target_compile_definitions(standalone_support PUBLIC GPUCA_NO_VC) endif() if(GPUCA_CONFIG_ROOT) diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index 133bf35281b55..c7869467d15b5 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -119,6 +119,8 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS}) + set(targetName ${MODULE}) + target_include_directories(${targetName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) endif() install (FILES macro/TPCFastTransformInit.C From 4f0674122bb8903ad679dbc4e0456687857d9977 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:15:59 +0100 Subject: [PATCH 0118/1764] GPU CMake: remove unnecessary compile definitions --- GPU/Common/CMakeLists.txt | 2 -- GPU/Utils/CMakeLists.txt | 2 -- 2 files changed, 4 deletions(-) diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index bacf4454c39fd..efac4fc436758 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -37,8 +37,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PUBLIC $ $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) - # cuda test, only compile if CUDA if(CUDA_ENABLED) o2_add_test(GPUsortCUDA NAME test_GPUsortCUDA diff --git a/GPU/Utils/CMakeLists.txt b/GPU/Utils/CMakeLists.txt index 01ca3eb59c029..bf23c792c2034 100644 --- a/GPU/Utils/CMakeLists.txt +++ b/GPU/Utils/CMakeLists.txt @@ -31,7 +31,5 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT} LINKDEF GPUUtilsLinkDef.h) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) - install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) endif() From 66174ae4805cb78c453c771dd1b24221380c2093 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:31:19 +0100 Subject: [PATCH 0119/1764] GPU Standalone: Fix compilation without FMT --- GPU/GPUTracking/Definitions/GPULogging.h | 2 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index 4ad6b70b2fd8b..c8ba635d1af7a 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -69,7 +69,7 @@ #define GPUCritical(...) GPUWarning(__VA_ARGS__) #define GPUFatal(string, ...) \ { \ - fprintf(stderr, string "\n", __VA_ARGS__); \ + fprintf(stderr, string "\n", ##__VA_ARGS__); \ exit(1); \ } #endif diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index cf5a447c26669..5a39f082f2bbd 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -128,7 +128,6 @@ find_package(O2GPU) add_subdirectory(../../ GPU) add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx From 0c4029015067d8f381ec76d1d5dd612556f8b7a5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:33:14 +0100 Subject: [PATCH 0120/1764] GPU: Fix incorrect printf syntax --- GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx | 2 +- GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index a48050a6cacbc..4047dcae0a6b3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -124,7 +124,7 @@ void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfF } if (pagesEndpoint != nPagesExpected) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %u", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected); } if (nAdcDecoded != nAdcsExpected) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 38e1cd0036c16..c42d9622f5332 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -258,13 +258,13 @@ void GPUChainTracking::SanityCheck() const auto& ref = trk.getClusterRef(); if (ref.getFirstEntry() > mIOPtrs.nOutputClusRefsTPCO2) { if (nErrors++ < 1000) { - GPUError("Invalid getFirst() entry in cluster reference: %u > $u", ref.getFirstEntry(), mIOPtrs.nOutputClusRefsTPCO2); + GPUError("Invalid getFirst() entry in cluster reference: %u > %u", ref.getFirstEntry(), mIOPtrs.nOutputClusRefsTPCO2); continue; } } if (ref.getFirstEntry() + (ref.getEntries() * 3 + 1) / 2 > mIOPtrs.nOutputClusRefsTPCO2) { if (nErrors++ < 1000) { - GPUError("Invalid getEntries() entry in cluster reference: %u > $u", ref.getFirstEntry() + (ref.getEntries() * 3 + 1) / 2, mIOPtrs.nOutputClusRefsTPCO2); + GPUError("Invalid getEntries() entry in cluster reference: %u > %u", ref.getFirstEntry() + (ref.getEntries() * 3 + 1) / 2, mIOPtrs.nOutputClusRefsTPCO2); continue; } } From 8df4ac28e698bd5294e442367a89e9d71bd0b48a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:41:33 +0100 Subject: [PATCH 0121/1764] GPU Standalone: Add CMake check to require ROOT if building event display --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 5a39f082f2bbd..1f48b4fc5ade1 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -112,6 +112,8 @@ endif() if(GPUCA_CONFIG_ROOT) find_package(ROOT CONFIG REQUIRED) +elseif(GPUCA_BUILD_EVENT_DISPLAY) + message(FATAL_ERROR "Cannot build event display without ROOT") endif() find_package(Microsoft.GSL REQUIRED HINTS "$ENV{MS_GSL_ROOT}/share/cmake") From ee497d7867e8f9893dfa6437f2ebf483c7a3687c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 23 Mar 2025 19:30:12 +0100 Subject: [PATCH 0122/1764] GPU CMake: Shuffle stuff a bit and move all NO_FAST_MATH stuff to GPU/... and FindO2GPU.cmake --- GPU/CMakeLists.txt | 1 - dependencies/FindO2GPU.cmake | 26 +++++++++++++++----------- dependencies/O2CompileFlags.cmake | 7 ------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 3c83c583eebfc..75cd5eddc0b24 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -16,7 +16,6 @@ if(NOT DEFINED GPUCA_NO_FAST_MATH) set(GPUCA_NO_FAST_MATH 0) endif() -set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") if(${GPUCA_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") endif() diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index f8d41c032078f..69241ea30a375 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -64,9 +64,20 @@ function(set_target_hip_arch target) endif() endfunction() -# Detect and enable CUDA -STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # Need to strip c++17 imposed by alidist defaults +# Need to strip c++17 imposed by alidist defaults +STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") +# ---------------------------------- Fast Math / Deterministic Mode ---------------------------------- +if(GPUCA_NO_FAST_MATH_WHOLEO2) + set(GPUCA_NO_FAST_MATH 1) + add_definitions(-DGPUCA_NO_FAST_MATH) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") + set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") +endif() +set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") +set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") + +# ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) @@ -124,7 +135,6 @@ if(ENABLE_CUDA) else() set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4") endif() - set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") if(DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH}") set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") @@ -146,7 +156,7 @@ if(ENABLE_CUDA) endif() endif() -# Detect and enable OpenCL 1.2 from AMD +# ---------------------------------- HIP ---------------------------------- if(ENABLE_OPENCL) find_package(OpenCL) if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO") @@ -154,11 +164,6 @@ if(ENABLE_OPENCL) else() set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL) endif() -endif() - -# Detect and enable OpenCL 2.x -if(ENABLE_OPENCL) - find_package(OpenCL) find_package(LLVM) if(LLVM_FOUND) find_package(Clang) @@ -196,7 +201,7 @@ if(ENABLE_OPENCL) endif() endif() -# Detect and enable HIP +# ---------------------------------- HIP ---------------------------------- if(ENABLE_HIP) if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") @@ -303,7 +308,6 @@ if(ENABLE_HIP) endif() message(FATAL_ERROR "HIP requested but some of the above packages are not found") endif() - endif() # if we end up here without a FATAL, it means we have found the "O2GPU" package diff --git a/dependencies/O2CompileFlags.cmake b/dependencies/O2CompileFlags.cmake index 08dd388cbdf36..eeddc189e8897 100644 --- a/dependencies/O2CompileFlags.cmake +++ b/dependencies/O2CompileFlags.cmake @@ -138,11 +138,4 @@ if(DEFINED ENV{O2_CXXFLAGS_OVERRIDE}) message(STATUS "Setting CXXFLAGS Override $ENV{O2_CXXFLAGS_OVERRIDE}") endif() -if(GPUCA_NO_FAST_MATH_WHOLEO2) - set(GPUCA_NO_FAST_MATH 1) - add_definitions(-DGPUCA_NO_FAST_MATH) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") - set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") -endif() - message(STATUS "Using build type: ${CMAKE_BUILD_TYPE} - CXXFLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") From e836e3fd219c12a40b306065d893680585d5aebf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 23 Mar 2025 20:56:42 +0100 Subject: [PATCH 0123/1764] GPU: Replace GPUCA_NO_FAST_MATH by more fine-grain GPUCA_DETERMINISTIC_MODE --- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 4 +-- GPU/CMakeLists.txt | 7 ----- GPU/Common/GPUCommonMath.h | 6 ++-- GPU/GPUTracking/Base/GPUReconstruction.cxx | 4 +-- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +-- GPU/GPUTracking/CMakeLists.txt | 15 ++++++---- .../Definitions/GPUDefGPUParameters.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 24 +++++++-------- GPU/GPUTracking/Standalone/CMakeLists.txt | 10 +------ GPU/GPUTracking/Standalone/cmake/config.cmake | 16 +++++----- dependencies/FindO2GPU.cmake | 30 ++++++++++++++----- 13 files changed, 66 insertions(+), 60 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index ce93523319e99..a66eba7c3bacb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -43,7 +43,7 @@ #define THRUST_NAMESPACE thrust::hip #endif -#ifdef GPUCA_NO_FAST_MATH +#ifdef GPUCA_DETERMINISTIC_MODE #define GPU_BLOCKS 1 #define GPU_THREADS 1 #else @@ -1452,4 +1452,4 @@ template void processNeighboursHandler<7>(const int startLayer, const o2::base::PropagatorF::MatCorrType matCorrType, const int nBlocks, const int nThreads); -} // namespace o2::its \ No newline at end of file +} // namespace o2::its diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 75cd5eddc0b24..7e2b797ae714f 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -13,13 +13,6 @@ # CMake, variables are defined for Sources / Headers first. Then, the actual # CMake build scripts use these variables. -if(NOT DEFINED GPUCA_NO_FAST_MATH) - set(GPUCA_NO_FAST_MATH 0) -endif() -if(${GPUCA_NO_FAST_MATH}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") -endif() - add_subdirectory(Common) add_subdirectory(Utils) add_subdirectory(TPCFastTransformation) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index c412662fc0c64..b7a44c1df0f38 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -250,7 +250,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) #endif } -#ifdef GPUCA_NO_FAST_MATH +#ifdef GPUCA_DETERMINISTIC_MODE GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } @@ -286,7 +286,7 @@ GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; } GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) { -#if defined(GPUCA_NO_FAST_MATH) && !defined(__OPENCL__) +#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__) s = sin((double)x); c = cos((double)x); #elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__) @@ -392,7 +392,7 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, GPUdi() float GPUCommonMath::InvSqrt(float _x) { -#if defined(GPUCA_NO_FAST_MATH) || defined(__OPENCL__) +#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__) return 1.f / Sqrt(_x); #elif defined(__CUDACC__) || defined(__HIPCC__) return __frsqrt_rn(_x); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 8bae1df267412..5582084fd0e17 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -261,8 +261,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6; } if (mProcessingSettings.deterministicGPUReconstruction) { -#ifndef GPUCA_NO_FAST_MATH - GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); +#ifndef GPUCA_DETERMINISTIC_MODE + GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 5b2e53179e50c..f8203c2dc5858 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -172,7 +172,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") TARGET_DIRECTORY ${targetName} PROPERTIES COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH") + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 21a641c0cc7c0..fee43eb6d8b0d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -230,7 +230,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") TARGET_DIRECTORY ${targetName} PROPERTIES COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH") + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 89d2f386f768f..3da5b77f80d86 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -24,10 +24,10 @@ set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) -if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH}) +if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() -set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) + set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) endif() set(OCL_DEFINECL "-D$,$-D>" "-I$,EXCLUDE,^/usr/include/?>,$-I>" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index e69e11c91d157..dedfcf5953394 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -14,8 +14,13 @@ set(MODULE GPUTracking) # set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this... # set(GPUCA_BUILD_DEBUG 1) -if(NOT "${GPUCA_NO_FAST_MATH}" AND NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") +if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + endif() +elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() include(cmake/helpers.cmake) @@ -375,7 +380,7 @@ set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx TARGET_DIRECTORY ${targetName} PROPERTIES COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH") + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") # GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h) @@ -417,6 +422,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() endif() -if(${GPUCA_NO_FAST_MATH}) - target_compile_definitions(${targetName} PUBLIC GPUCA_NO_FAST_MATH) +if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) + target_compile_definitions(${targetName} PUBLIC GPUCA_DETERMINISTIC_MODE) endif() diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 55f2e76344bd5..6d6645850408f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -566,7 +566,7 @@ #ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #endif -#ifdef GPUCA_NO_FAST_MATH +#ifdef GPUCA_DETERMINISTIC_MODE #undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #undef GPUCA_DEDX_STORAGE_TYPE diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index eb1df3f37b6b5..5ede29d7fd851 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -723,13 +723,13 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea if (iThread == 0) { if (iBlock == 0) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); }); #else GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; }); #endif } else if (iBlock == 1) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); }); #else GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; }); @@ -749,7 +749,7 @@ namespace // anonymous struct MergeBorderTracks_compMax { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); #else return a.fMax < b.fMax; @@ -759,7 +759,7 @@ struct MergeBorderTracks_compMax { struct MergeBorderTracks_compMin { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); #else return a.fMin < b.fMin; @@ -906,7 +906,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea mTrackLinks[b1.TrackID()] = iBest2; if (mergeMode > 0) { -#if defined(GPUCA_NO_FAST_MATH) // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()); #else mTrackLinks[iBest2] = b1.TrackID(); @@ -1469,7 +1469,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper { if (a1.row != b1.row) { return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a1.id != b1.id) { return (a1.id > b1.id); } @@ -1490,7 +1490,7 @@ struct GPUTPCGMMerger_CompareClusterIds { if (a.row != b.row) { return (a.row > b.row); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a.id != b.id) { return (a.id > b.id); } @@ -1569,7 +1569,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread // unpack and sort clusters if (nParts > 1 && leg == 0) { GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a->X() != b->X()) { return (a->X() > b->X()); } @@ -1834,7 +1834,7 @@ struct GPUTPCGMMergerSortTracks_comp { if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a.NClusters() != b.NClusters()) { return a.NClusters() > b.NClusters(); } @@ -1858,7 +1858,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp { { const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); } @@ -1907,7 +1907,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a.NClusters() != b.NClusters()) { return a.NClusters() > b.NClusters(); } @@ -1937,7 +1937,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); } diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 1f48b4fc5ade1..ed4fc5c9f7e2d 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -52,15 +52,7 @@ if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") set(CMAKE_BUILD_TYPE DEBUG) else() - set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector") - if(DEFINED GPUCA_NO_FAST_MATH AND ${GPUCA_NO_FAST_MATH}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math -ffp-contract=off") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") - endif() - if (NOT CMAKE_CXX_COMPILER STREQUAL "clang++") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftracer -fprefetch-loop-arrays") - endif() + set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb") set(CMAKE_BUILD_TYPE RELEASE) add_definitions(-DNDEBUG) endif() diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 87716d700abc8..af7c96bb96fbb 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -27,13 +27,13 @@ set(GPUCA_CONFIG_GL3W 0) set(GPUCA_CONFIG_O2 1) set(GPUCA_BUILD_DEBUG 0) set(GPUCA_BUILD_DEBUG_SANITIZE 0) -set(GPUCA_NO_FAST_MATH 0) -#set(GPUCA_CUDA_GCCBIN c++-13) -#set(GPUCA_OPENCL_CLANGBIN clang-18) -#set(HIP_AMDGPUTARGET "gfx906;gfx908;gfx90a") -set(HIP_AMDGPUTARGET "default") -#set(CUDA_COMPUTETARGET 86 89) -set(CUDA_COMPUTETARGET "default") -#set(GPUCA_CUDA_COMPILE_MODE perkernel) +set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2 +#set(GPUCA_CUDA_GCCBIN c++-14) +#set(GPUCA_OPENCL_CLANGBIN clang-19) +set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a" +set(CUDA_COMPUTETARGET "default") # 86 89 +#set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc #set(GPUCA_HIP_COMPILE_MODE perkernel) #set(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE 1) +#set(GPUCA_CONFIG_COMPILER gcc) # gcc / clang +#add_definitions(-DGPUCA_GPU_DEBUG_PRINT) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 69241ea30a375..650a269209d9b 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -68,14 +68,30 @@ endfunction() STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # ---------------------------------- Fast Math / Deterministic Mode ---------------------------------- -if(GPUCA_NO_FAST_MATH_WHOLEO2) - set(GPUCA_NO_FAST_MATH 1) - add_definitions(-DGPUCA_NO_FAST_MATH) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") - set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") +# set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override +set(GPUCA_DETERMINISTIC_MODE_MAP_OFF 0) +set(GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH 1) # No -ffast-math and similar compile flags for GPU folder +set(GPUCA_DETERMINISTIC_MODE_MAP_OPTO2 2) # In addition, -O2 optimization on host for GPU folder +set(GPUCA_DETERMINISTIC_MODE_MAP_GPU 3) # In addition, GPUCA_DETERMINISTIC_MODE define for GPU folder +set(GPUCA_DETERMINISTIC_MODE_MAP_ON 3) # Synonym for GPU +set(GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2 4) # As GPU but for whole O2 code +if(NOT DEFINED GPUCA_DETERMINISTIC_MODE) + set(GPUCA_DETERMINISTIC_MODE 0) +elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") + if(NOT DEFINED GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}) + message(FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE") + endif() + set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}}) + message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") +if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2}) + add_definitions(-DGPUCA_DETERMINISTIC_MODE) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") +endif() + # ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) @@ -135,7 +151,7 @@ if(ENABLE_CUDA) else() set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4") endif() - if(DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH}") + if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")# @@ -274,7 +290,7 @@ if(ENABLE_HIP) if(HIP_AMDGPUTARGET) set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection endif() - if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH}) + if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math") endif() set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}") From b6ad4bc54e104ec8842eb77cfed4af2edbda6d85 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:26:00 +0100 Subject: [PATCH 0124/1764] DPL: Print error in case we receive data with bogus runNumber / tfCounter --- Framework/Core/src/ExternalFairMQDeviceProxy.cxx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx index 823ef8f5fd5a0..52e0413d48dab 100644 --- a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx +++ b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx @@ -493,6 +493,12 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL } } + int fmqRunNumber = -1; + try { + fmqRunNumber = atoi(device->fConfig->GetProperty("runNumber", "").c_str()); + } catch (...) { + } + for (int msgidx = 0; msgidx < parts.Size(); msgidx += 2) { if (parts.At(msgidx).get() == nullptr) { LOG(error) << "unexpected nullptr found. Skipping message pair."; @@ -521,6 +527,9 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL timingInfo.runNumber = dh->runNumber; timingInfo.tfCounter = dh->tfCounter; LOG(debug) << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); + if (dh->runNumber == 0 || dh->tfCounter == 0 || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { + LOG(error) << "INVALID runNumber / tfCounter: runNumber " << dh->runNumber << ", tfCounter " << dh->tfCounter << ", FMQ runNumber " << fmqRunNumber; + } OutputSpec query{dh->dataOrigin, dh->dataDescription, dh->subSpecification}; LOG(debug) << "processing " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " time slice " << dph->startTime << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts; From b83452a140d60b615fe88ee156f6311abe7aa72d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:26:34 +0100 Subject: [PATCH 0125/1764] GPU: Can insert marker earlier, no need to wait --- GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 6c79d87e50465..36a947dda9dc3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -32,8 +32,8 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto } uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { - TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); RecordMarker(&mEvents->single, 0); + TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); for (uint32_t i = 0; i < n; i++) { int32_t stream = i % mRec->NStreams(); runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode); From fe2f93e7e42de2fcfbefd55a7e4246e85bd6813c Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 24 Mar 2025 18:26:21 +0100 Subject: [PATCH 0126/1764] Fix method MCTrackInfo::getNITSClusCont --- .../GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx index 92107d90b48ed..204e0c741a675 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx @@ -25,7 +25,9 @@ int MCTrackInfo::getNITSClusCont() const int longest = 0, current = 0; for (int i = 0; i < 7; i++) { if (pattITSCl & (0x1 << i)) { - longest = ++current; + if (++current > longest) { + longest = current; + } } else { current = 0; } From e7d0f6ad02eb19bc6dab981b65bfa838234c865f Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 24 Mar 2025 17:40:10 +0100 Subject: [PATCH 0127/1764] ALICE3: Fix TRKConfigParam dictionary creation --- Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt | 3 ++- .../ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt index 856fd310fe5a2..a1cb0279efef8 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt @@ -30,4 +30,5 @@ o2_target_root_dictionary(TRKSimulation include/TRKSimulation/TRKServices.h include/TRKSimulation/TRKPetalCase.h include/TRKSimulation/TRKPetalLayer.h - include/TRKSimulation/TRKPetalDisk.h) \ No newline at end of file + include/TRKSimulation/TRKPetalDisk.h + include/TRKSimulation/DPLDigitizerParam.h) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h index 876810b5bef9d..d80027593cef0 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h @@ -24,9 +24,9 @@ #pragma link C++ class o2::base::DetImpl < o2::trk::Detector> + ; #pragma link C++ class o2::trk::Digitizer + ; -// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; -// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; -// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK>> + ; -// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3>> + ; +#pragma link C++ class o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK> + ; +#pragma link C++ class o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3> + ; +#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK>> + ; +#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3>> + ; #endif From a31999e1aca0bd85c649a4995011a7317013a227 Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 24 Mar 2025 15:48:44 +0100 Subject: [PATCH 0128/1764] Make ITS reco start layer configurable --- .../ITS/tracking/include/ITStracking/TrackingConfigParam.h | 1 + Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index ec96321765534..cb6512248bf0c 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -62,6 +62,7 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper0, otherwise use code defaults + uint8_t startLayerMask[MaxIter] = {}; // mask of start layer for this iteration (if >0) float minPtIterLgt[MaxIter * (MaxTrackLength - MinTrackLength + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults float sysErrY2[7] = {0}; // systematic error^2 in Y per layer float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 2c94c9bdb1f46..613402ce56e97 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -78,6 +78,9 @@ void ITSTrackingInterface::initialise() param.TrackletsPerClusterLimit = 1.e3f; // check if something was overridden via configurable params if (ip < trackConf.MaxIter) { + if (trackConf.startLayerMask[ip] > 0) { + trackParams[2].StartLayerMask = trackConf.startLayerMask[ip]; + } if (trackConf.minTrackLgtIter[ip] > 0) { param.MinTrackLength = trackConf.minTrackLgtIter[ip]; } From c2aa2f542ded67b6f2c3bde53111241675e90501 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:57:07 +0100 Subject: [PATCH 0129/1764] GPU: Remove hack to disable synchronization in thrust::sort, which was not working any more --- GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h | 24 ++----------------- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 +++---- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 3 files changed, 7 insertions(+), 27 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h index fdc5c16d91f35..676610b5e4c52 100644 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h @@ -22,12 +22,12 @@ namespace o2::gpu { -class ThrustVolatileAsyncAllocator +class ThrustVolatileAllocator { public: typedef char value_type; - ThrustVolatileAsyncAllocator(GPUReconstruction* r) : mRec(r) {} + ThrustVolatileAllocator(GPUReconstruction* r) : mRec(r) {} char* allocate(std::ptrdiff_t n) { return (char*)mRec->AllocateVolatileDeviceMemory(n); } void deallocate(char* ptr, size_t) {} @@ -38,24 +38,4 @@ class ThrustVolatileAsyncAllocator } // namespace o2::gpu -#ifndef __HIPCC__ -// Override synchronize call at end of thrust algorithm running on stream, just don't run cudaStreamSynchronize -namespace thrust::cuda_cub -{ - -typedef thrust::cuda_cub::execution_policy thrustStreamPolicy; -template <> -__host__ __device__ inline cudaError_t synchronize(thrustStreamPolicy& policy) -{ -#ifndef GPUCA_GPUCODE_DEVICE - // Do not synchronize! - return cudaSuccess; -#else - return synchronize_stream(derived_cast(policy)); -#endif -} - -} // namespace thrust::cuda_cub -#endif // __HIPCC__ - #endif // GPU_CUDATHRUSTHELPERS_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 5ede29d7fd851..92ecd380fa38f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -773,7 +773,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); if (cmpMax) { thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax()); } else { @@ -1878,7 +1878,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } @@ -1886,7 +1886,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt @@ -2111,7 +2111,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 13f204d0f940a..c366133bbae21 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -105,7 +105,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread From d9b654e5df67ecd8b5a81aa3625f3c9bb6164261 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 14:28:46 +0100 Subject: [PATCH 0130/1764] GPU: Add getThrustVolatileDeviceAllocator function --- GPU/Common/GPUCommonHelpers.h | 19 +++++++++ GPU/GPUTracking/Base/GPUReconstruction.cxx | 10 +++++ GPU/GPUTracking/Base/GPUReconstruction.h | 2 + GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h | 41 ------------------- .../Base/cuda/GPUReconstructionCUDA.cu | 1 - .../GPUReconstructionCUDAExternalProvider.cu | 1 - .../cuda/GPUReconstructionCUDAHelpers.inc | 7 ++++ .../Base/cuda/GPUReconstructionCUDAKernels.cu | 1 - GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 ++-- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 3 +- 12 files changed, 47 insertions(+), 52 deletions(-) delete mode 100644 GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h index 915d93c9bc791..2927ddab6bd0c 100644 --- a/GPU/Common/GPUCommonHelpers.h +++ b/GPU/Common/GPUCommonHelpers.h @@ -35,6 +35,7 @@ #include "GPUCommonDef.h" #include "GPUCommonLogger.h" #include +#include namespace o2::gpu::internal { @@ -60,4 +61,22 @@ static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* f #undef GPUCOMMON_INTERNAL_CAT } // namespace o2::gpu::internal +namespace o2::gpu +{ +class GPUReconstruction; +class ThrustVolatileAllocator +{ + public: + typedef char value_type; + + char* allocate(std::ptrdiff_t n); + void deallocate(char* ptr, size_t); + + private: + ThrustVolatileAllocator(GPUReconstruction* r); + std::function mAlloc; + friend class GPUReconstruction; +}; +} // namespace o2::gpu + #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 5582084fd0e17..d96d5aad74622 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -30,6 +30,7 @@ #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" +#include "GPUCommonHelpers.h" #include "GPUMemoryResource.h" #include "GPUChain.h" @@ -1193,3 +1194,12 @@ void GPUReconstruction::SetInputControl(void* ptr, size_t size) { mInputControl.set(ptr, size); } + +ThrustVolatileAllocator::ThrustVolatileAllocator(GPUReconstruction* r) +{ + mAlloc = [&r](size_t n) { return (char*)r->AllocateVolatileDeviceMemory(n); }; +} +ThrustVolatileAllocator GPUReconstruction::getThrustVolatileDeviceAllocator() +{ + return ThrustVolatileAllocator(this); +} diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 5e03c77f08230..18098396e1349 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -47,6 +47,7 @@ struct GPUMemorySizeScalers; struct GPUReconstructionPipelineContext; struct GPUReconstructionThreading; class GPUROOTDumpCore; +class ThrustVolatileAllocator; namespace gpu_reconstruction_kernels { @@ -165,6 +166,7 @@ class GPUReconstruction void ClearAllocatedMemory(bool clearOutputs = true); void ReturnVolatileDeviceMemory(); void ReturnVolatileMemory(); + ThrustVolatileAllocator getThrustVolatileDeviceAllocator(); void PushNonPersistentMemory(uint64_t tag); void PopNonPersistentMemory(RecoStep step, uint64_t tag); void BlockStackedMemory(GPUReconstruction* rec); diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index f8203c2dc5858..99c59afd2011a 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -18,7 +18,7 @@ endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) -set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) +set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h deleted file mode 100644 index 676610b5e4c52..0000000000000 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file CUDAThrustHelpers.h -/// \author David Rohr - -#ifndef GPU_CUDATHRUSTHELPERS_H -#define GPU_CUDATHRUSTHELPERS_H - -#include "GPULogging.h" -#include -#include - -namespace o2::gpu -{ - -class ThrustVolatileAllocator -{ - public: - typedef char value_type; - - ThrustVolatileAllocator(GPUReconstruction* r) : mRec(r) {} - char* allocate(std::ptrdiff_t n) { return (char*)mRec->AllocateVolatileDeviceMemory(n); } - - void deallocate(char* ptr, size_t) {} - - private: - GPUReconstruction* mRec; -}; - -} // namespace o2::gpu - -#endif // GPU_CUDATHRUSTHELPERS_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 202edd49bc44c..175fd205153ea 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -19,7 +19,6 @@ #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" -#include "CUDAThrustHelpers.h" #include "GPUReconstructionIncludes.h" #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index f341a778076b8..534f5e8606897 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -16,7 +16,6 @@ #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" -#include "CUDAThrustHelpers.h" #include diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc index a34f940a1337a..c2b6f6d05dd7f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc @@ -16,6 +16,7 @@ #define GPURECONSTRUCTIONCUDAHELPERS_INC_H #include "GPUCommonHelpers.h" +#include "GPUReconstruction.h" namespace o2::gpu::internal { @@ -28,4 +29,10 @@ int32_t __attribute__((weak)) GPUReconstructionCUDAChkErr(const int64_t error, c } } // namespace o2::gpu::internal +namespace o2::gpu +{ +char* __attribute__((weak)) ThrustVolatileAllocator::allocate(std::ptrdiff_t n) { return mAlloc(n); } +void __attribute__((weak)) ThrustVolatileAllocator::deallocate(char* ptr, size_t) {} +} // namespace o2::gpu + #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 0c83223ba238a..f1f459fe021bc 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -16,7 +16,6 @@ #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" -#include "CUDAThrustHelpers.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index fee43eb6d8b0d..16e6e72d56e9a 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -63,7 +63,7 @@ endif() set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) -set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) +set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 92ecd380fa38f..c57ca9e5e1436 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -773,7 +773,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); if (cmpMax) { thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax()); } else { @@ -1878,7 +1878,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } @@ -1886,7 +1886,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt @@ -2111,7 +2111,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index c366133bbae21..9594391163586 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -105,7 +105,8 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); + ; thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread From ac0408b22dc8acdd41a64308de4a24b2a32b6264 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 15:14:02 +0100 Subject: [PATCH 0131/1764] GPU: Add GPUCommonAlgorithm::sortOnDevice function for starting sort on device from host --- GPU/Common/GPUCommonAlgorithm.h | 4 ++++ GPU/Common/GPUCommonAlgorithmThrust.h | 8 ++++++++ .../Base/cuda/GPUReconstructionCUDA.h | 2 ++ .../Global/GPUChainTrackingMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 18 +++++------------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 5 +---- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index a733f0ff99f26..b82c0059b9e15 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -43,6 +43,10 @@ class GPUCommonAlgorithm GPUd() static void sortInBlock(T* begin, T* end, const S& comp); template GPUd() static void sortDeviceDynamic(T* begin, T* end, const S& comp); +#ifndef __OPENCL__ + template + GPUh() static void sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); +#endif template GPUd() static void swap(T& a, T& b); diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 0208c12f1cd08..f37445ede9c84 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -87,6 +87,14 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd, comp); } +template +GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp) +{ + thrust::device_ptr p(begin); + auto alloc = rec->getThrustVolatileDeviceAllocator(); + thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp); +} + } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index f78270d40146c..30bbc76d4c415 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -54,6 +54,8 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void getRTCKernelCalls(std::vector& kernels); + template + friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); GPUReconstructionCUDAInternals* mInternals; }; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 36a947dda9dc3..8fba328f8ac38 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -121,7 +121,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) for (uint32_t i = 0; i < NSECTORS; i++) { runKernel({{1, -WarpSize(), 0, deviceType}}, i); runKernel(GetGridAuto(0, deviceType), i); - runKernel(GetGridAuto(0, deviceType), i); + runKernel(GetGridAuto(0, deviceType), i); // TODO: Why all in stream 0? } if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{1, -WarpSize(), 0, deviceType}}, NSECTORS); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index c57ca9e5e1436..c8c844eee748a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -772,12 +772,10 @@ struct MergeBorderTracks_compMin { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { - thrust::device_ptr p(range); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); if (cmpMax) { - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); } else { - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMin()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin()); } } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3> @@ -1877,17 +1875,13 @@ struct GPUTPCGMMergerSortTracksQPt_comp { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt @@ -2110,9 +2104,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9594391163586..8056f22484e70 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -104,10 +104,7 @@ struct GPUTPCGMO2OutputSort_comp { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - ; - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread From 15df642b632fea3b9bf45a47b785d3eac54da262 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 15:34:16 +0100 Subject: [PATCH 0132/1764] GPU: Switch to C++11 nested namespace style in more places --- GPU/Common/GPUCommonAlgorithm.h | 28 ++++++--------------------- GPU/Common/GPUCommonAlgorithmThrust.h | 8 ++------ GPU/Common/GPUCommonMath.h | 7 ++----- GPU/Common/GPUCommonTransform3D.h | 7 ++----- GPU/Common/GPUROOTCartesianFwd.h | 7 ++----- GPU/Common/GPUROOTSMatrixFwd.h | 7 ++----- 6 files changed, 16 insertions(+), 48 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index b82c0059b9e15..4970876f39e4c 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -24,9 +24,7 @@ // ----------------------------- SORTING ----------------------------- -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUCommonAlgorithm { @@ -75,13 +73,6 @@ class GPUCommonAlgorithm template GPUd() static void IterSwap(I a, I b) noexcept; }; -} // namespace gpu -} // namespace o2 - -namespace o2 -{ -namespace gpu -{ #ifndef GPUCA_ALGORITHM_STD template @@ -221,8 +212,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l) noexcept typedef GPUCommonAlgorithm CAAlgo; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #if (((defined(__CUDACC__) && !defined(__clang__)) || defined(__HIPCC__))) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_HOSTONLY) @@ -230,9 +220,7 @@ typedef GPUCommonAlgorithm CAAlgo; #else -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -251,15 +239,12 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co GPUCommonAlgorithm::sort(begin, end, comp); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // THRUST // sort and sortInBlock below are not taken from Thrust, since our implementations are faster -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -332,8 +317,7 @@ GPUdi() void GPUCommonAlgorithm::swap(T& a, T& b) } #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu // ----------------------------- WORK GROUP FUNCTIONS ----------------------------- diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index f37445ede9c84..2da4b6a4f965d 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -30,9 +30,7 @@ #define GPUCA_THRUST_NAMESPACE thrust::hip #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { // - Our quicksort and bubble sort implementations are faster @@ -94,8 +92,6 @@ GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begi auto alloc = rec->getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp); } - -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index b7a44c1df0f38..58f046161aa8b 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -42,9 +42,7 @@ #define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host #endif // clang-format on -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUCommonMath @@ -540,7 +538,6 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt #undef GPUCA_CHOICE -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUCOMMONMATH_H diff --git a/GPU/Common/GPUCommonTransform3D.h b/GPU/Common/GPUCommonTransform3D.h index 4c5cca1f00ddc..2f517aded7eed 100644 --- a/GPU/Common/GPUCommonTransform3D.h +++ b/GPU/Common/GPUCommonTransform3D.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class Transform3D { @@ -79,7 +77,6 @@ class Transform3D kZZ = 10, kDZ = 11 }; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/Common/GPUROOTCartesianFwd.h b/GPU/Common/GPUROOTCartesianFwd.h index 89b0aa44eb78c..c631637a3bc6e 100644 --- a/GPU/Common/GPUROOTCartesianFwd.h +++ b/GPU/Common/GPUROOTCartesianFwd.h @@ -46,9 +46,7 @@ class DefaultCoordinateSystemTag; } // namespace Math } // namespace ROOT -namespace o2 -{ -namespace math_utils +namespace o2::math_utils { namespace detail @@ -79,7 +77,6 @@ template using Vector3D = detail::GPUPoint3D; #endif -} // namespace math_utils -} // namespace o2 +} // namespace o2::math_utils #endif diff --git a/GPU/Common/GPUROOTSMatrixFwd.h b/GPU/Common/GPUROOTSMatrixFwd.h index 44b2254949df2..0159cc8922140 100644 --- a/GPU/Common/GPUROOTSMatrixFwd.h +++ b/GPU/Common/GPUROOTSMatrixFwd.h @@ -35,9 +35,7 @@ class MatRepStd; } // namespace Math } // namespace ROOT -namespace o2 -{ -namespace math_utils +namespace o2::math_utils { namespace detail @@ -72,7 +70,6 @@ template using MatRepStd = detail::MatRepStdGPU; #endif -} // namespace math_utils -} // namespace o2 +} // namespace o2::math_utils #endif From 30efe2e219398cdefbac4ea06d431b140d042bed Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:56:25 +0100 Subject: [PATCH 0133/1764] GPUCommonAlgorithm: Cleanup preprocessor defines --- GPU/Common/GPUCommonAlgorithm.h | 4 ++++ GPU/Common/GPUCommonAlgorithmThrust.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index 4970876f39e4c..5c19dda27f593 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -446,4 +446,8 @@ GPUdi() T warp_broadcast(T v, int32_t i) #endif +#ifdef GPUCA_ALGORITHM_STD +#undef GPUCA_ALGORITHM_STD +#endif + #endif diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 2da4b6a4f965d..6bf605a370050 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -52,7 +52,7 @@ GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end, const S& comp) } template -GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end) // TODO: Try cub::BlockMergeSort { if (get_local_id(0) == 0) { sortDeviceDynamic(begin, end); @@ -94,4 +94,6 @@ GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begi } } // namespace o2::gpu +#undef GPUCA_THRUST_NAMESPACE + #endif From 7a706ae53b64cfddf58b69f231c13246b11dba26 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 16:14:36 +0100 Subject: [PATCH 0134/1764] GPUCommonAlgorithm: Use CUB for soring on device instead of Thrust --- GPU/Common/GPUCommonAlgorithmThrust.h | 18 ++++++++++++++++-- GPU/GPUTracking/Base/GPUGeneralKernels.h | 14 +++++++------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 6bf605a370050..049071227a58e 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -23,11 +23,16 @@ #pragma GCC diagnostic pop #include "GPUCommonDef.h" +#include "GPUCommonHelpers.h" -#ifdef __CUDACC__ +#ifndef __HIPCC__ // CUDA #define GPUCA_THRUST_NAMESPACE thrust::cuda -#else +#define GPUCA_CUB_NAMESPACE cub +#include +#else // HIP #define GPUCA_THRUST_NAMESPACE thrust::hip +#define GPUCA_CUB_NAMESPACE hipcub +#include #endif namespace o2::gpu @@ -89,11 +94,20 @@ template GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp) { thrust::device_ptr p(begin); +#if 0 // Use Thrust auto alloc = rec->getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp); +#else // Use CUB + size_t tempSize = 0; + void* tempMem = nullptr; + GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream])); + tempMem = rec->AllocateVolatileDeviceMemory(tempSize); + GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream])); +#endif } } // namespace o2::gpu #undef GPUCA_THRUST_NAMESPACE +#undef GPUCA_CUB_NAMESPACE #endif diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index ce93e2e5eead8..eb816c91f5909 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -27,9 +27,9 @@ #endif #if defined(__HIPCC__) -#define GPUCA_CUB hipcub +#define GPUCA_CUB_NAMESPACE hipcub #else -#define GPUCA_CUB cub +#define GPUCA_CUB_NAMESPACE cub #endif namespace o2::gpu @@ -54,7 +54,7 @@ class GPUKernelTemplate struct GPUSharedMemoryWarpScan64 { // Provides the shared memory resources for warp wide CUB collectives #if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY) - typedef GPUCA_CUB::WarpScan WarpScan; + typedef GPUCA_CUB_NAMESPACE::WarpScan WarpScan; union { typename WarpScan::TempStorage cubWarpTmpMem; }; @@ -65,9 +65,9 @@ class GPUKernelTemplate struct GPUSharedMemoryScan64 { // Provides the shared memory resources for CUB collectives #if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY) - typedef GPUCA_CUB::BlockScan BlockScan; - typedef GPUCA_CUB::BlockReduce BlockReduce; - typedef GPUCA_CUB::WarpScan WarpScan; + typedef GPUCA_CUB_NAMESPACE::BlockScan BlockScan; + typedef GPUCA_CUB_NAMESPACE::BlockReduce BlockReduce; + typedef GPUCA_CUB_NAMESPACE::WarpScan WarpScan; union { typename BlockScan::TempStorage cubTmpMem; typename BlockReduce::TempStorage cubReduceTmpMem; @@ -110,6 +110,6 @@ class GPUitoa : public GPUKernelTemplate } // namespace o2::gpu -#undef GPUCA_CUB +#undef GPUCA_CUB_NAMESPACE #endif From 3e56e5536e6e20e0ae89bc4dd3b55a60dd4fdf17 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 18:24:23 +0100 Subject: [PATCH 0135/1764] GPU: Improve synchronization during track-merging, no need to serialize the last kernel --- .../Global/GPUChainTrackingMerger.cxx | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 8fba328f8ac38..ffab3ba0be063 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -50,19 +50,13 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks(); runKernel({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0); runKernel({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1); - deviceEvent* e = nullptr; - int32_t ne = 0; - if (i == n - 1) { // Synchronize all execution on stream 0 with the last kernel - ne = std::min(n, mRec->NStreams()); - for (int32_t j = 1; j < ne; j++) { - RecordMarker(&mEvents->sector[j], j); - } - e = &mEvents->sector[1]; - ne--; - stream = 0; - } - runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSector, mergeMode); + runKernel({GetGridAuto(stream, deviceType)}, i, withinSector, mergeMode); + } + int32_t ne = std::min(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1 + for (int32_t j = 0; j < ne; j++) { + RecordMarker(&mEvents->sector[j], j + 1); } + StreamWaitForEvents(0, &mEvents->sector[0], ne); } else { for (uint32_t i = 0; i < n; i++) { runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); From 8f6726b8474012cf20bbda11ed3af125ec8b7c33 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 11:26:44 +0100 Subject: [PATCH 0136/1764] GPU: Change GPUCA_DETERMINISTIC_MODE define to GPUCA_DETERMINISTIC_CODE macro, that can be used also in RTC --- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 9 +- GPU/Common/GPUCommonDef.h | 10 +- GPU/Common/GPUCommonMath.h | 54 +++--- .../Definitions/GPUDefGPUParameters.h | 9 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 160 +++++++----------- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- GPU/GPUTracking/dEdx/GPUdEdx.cxx | 4 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 14 +- 10 files changed, 116 insertions(+), 150 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index a66eba7c3bacb..583452d0c429c 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -43,13 +43,8 @@ #define THRUST_NAMESPACE thrust::hip #endif -#ifdef GPUCA_DETERMINISTIC_MODE -#define GPU_BLOCKS 1 -#define GPU_THREADS 1 -#else -#define GPU_BLOCKS 99999 -#define GPU_THREADS 99999 -#endif +#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999) +#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999) // O2 track model #include "ReconstructionDataFormats/Track.h" diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index b4a788e66a81c..78da104a0c029 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -68,10 +68,18 @@ #define GPUCA_DEBUG_STREAMER_CHECK(...) #endif -#ifndef GPUCA_RTC_SPECIAL_CODE +#ifndef GPUCA_RTC_SPECIAL_CODE // By default, we ignore special RTC code #define GPUCA_RTC_SPECIAL_CODE(...) #endif +#ifndef GPUCA_DETERMINISTIC_CODE + #ifdef GPUCA_DETERMINISTIC_MODE + #define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path + #else + #define GPUCA_DETERMINISTIC_CODE(det, indet) indet // otherwise the fast default code path + #endif +#endif + // API Definitions for GPU Compilation #include "GPUCommonDefAPI.h" diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 58f046161aa8b..6d97250e7f2f4 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -248,7 +248,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) #endif } -#ifdef GPUCA_DETERMINISTIC_MODE +GPUCA_DETERMINISTIC_CODE( // clang-format off GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } @@ -264,7 +264,7 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } -#else +, // !GPUCA_DETERMINISTIC_CODE GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } @@ -280,20 +280,22 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; } GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; } -#endif +) // clang-format on GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) { -#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__) - s = sin((double)x); - c = cos((double)x); -#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__) - __sincosf(x, &s, &c); + GPUCA_DETERMINISTIC_CODE( // clang-format off + s = sin((double)x); + c = cos((double)x); + , // !GPUCA_DETERMINISTIC_CODE +#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__) + __sincosf(x, &s, &c); #elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE)) - sincosf(x, &s, &c); + sincosf(x, &s, &c); #else - GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); + GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); #endif + ) // clang-format on } GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) @@ -390,22 +392,26 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, GPUdi() float GPUCommonMath::InvSqrt(float _x) { -#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__) - return 1.f / Sqrt(_x); -#elif defined(__CUDACC__) || defined(__HIPCC__) - return __frsqrt_rn(_x); -#elif defined(__FAST_MATH__) - return 1.f / sqrtf(_x); + GPUCA_DETERMINISTIC_CODE( // clang-format off + return 1.f / Sqrt(_x); + , // !GPUCA_DETERMINISTIC_CODE +#if defined(__CUDACC__) || defined(__HIPCC__) + return __frsqrt_rn(_x); +#elif defined(__OPENCL__) && defined(__clang__) + return 1.f / sqrt(_x); +#elif !defined(__OPENCL__) && (defined(__FAST_MATH__) || defined(__clang__)) + return 1.f / sqrtf(_x); #else - union { - float f; - int32_t i; - } x = {_x}; - const float xhalf = 0.5f * x.f; - x.i = 0x5f3759df - (x.i >> 1); - x.f = x.f * (1.5f - xhalf * x.f * x.f); - return x.f; + union { + float f; + int32_t i; + } x = {_x}; + const float xhalf = 0.5f * x.f; + x.i = 0x5f3759df - (x.i >> 1); + x.f = x.f * (1.5f - xhalf * x.f * x.f); + return x.f; #endif + ) // clang-format on } template <> diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 6d6645850408f..5b5a89cc8bc39 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -25,6 +25,7 @@ #error Please include GPUDef.h #endif +#include "GPUCommonDef.h" #include "GPUDefMacros.h" // GPU Run Configuration @@ -566,12 +567,8 @@ #ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #endif -#ifdef GPUCA_DETERMINISTIC_MODE -#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#undef GPUCA_DEDX_STORAGE_TYPE -#define GPUCA_DEDX_STORAGE_TYPE float -#endif +#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) #ifndef GPUCA_WARP_SIZE #ifdef GPUCA_GPUCODE diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index c8c844eee748a..288a24dee5d99 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -723,17 +723,9 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea if (iThread == 0) { if (iBlock == 0) { -#ifdef GPUCA_DETERMINISTIC_MODE - GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); }); -#else - GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; }); -#endif + GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); } else if (iBlock == 1) { -#ifdef GPUCA_DETERMINISTIC_MODE - GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); }); -#else - GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; }); -#endif + GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); } } #else @@ -749,21 +741,13 @@ namespace // anonymous struct MergeBorderTracks_compMax { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_DETERMINISTIC_MODE - return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); -#else - return a.fMax < b.fMax; -#endif + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); } }; struct MergeBorderTracks_compMin { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_DETERMINISTIC_MODE - return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); -#else - return a.fMin < b.fMin; -#endif + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); } }; } // anonymous namespace @@ -904,11 +888,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea mTrackLinks[b1.TrackID()] = iBest2; if (mergeMode > 0) { -#ifdef GPUCA_DETERMINISTIC_MODE - CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()); -#else - mTrackLinks[iBest2] = b1.TrackID(); -#endif + GPUCA_DETERMINISTIC_CODE(CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()), mTrackLinks[iBest2] = b1.TrackID()); } } // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged); @@ -1467,14 +1447,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper { if (a1.row != b1.row) { return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a1.id != b1.id) { - return (a1.id > b1.id); - } - return aa > bb; -#else - return a1.id > b1.id; -#endif + return GPUCA_DETERMINISTIC_CODE((a1.id != b1.id) ? (a1.id > b1.id) : (aa > bb), a1.id > b1.id); } }; @@ -1488,14 +1461,7 @@ struct GPUTPCGMMerger_CompareClusterIds { if (a.row != b.row) { return (a.row > b.row); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a.id != b.id) { - return (a.id > b.id); - } - return aa > bb; -#else - return (a.id > b.id); -#endif + return GPUCA_DETERMINISTIC_CODE((a.id != b.id) ? (a.id > b.id) : (aa > bb), a.id > b.id); } }; } // anonymous namespace @@ -1567,20 +1533,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread // unpack and sort clusters if (nParts > 1 && leg == 0) { GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { -#ifdef GPUCA_DETERMINISTIC_MODE - if (a->X() != b->X()) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a->X() != b->X()) { + return (a->X() > b->X()); + } + if (a->Y() != b->Y()) { + return (a->Y() > b->Y()); + } + if (a->Z() != b->Z()) { + return (a->Z() > b->Z()); + } + return a->QPt() > b->QPt(); + , // !GPUCA_DETERMINISTIC_CODE return (a->X() > b->X()); - } - if (a->Y() != b->Y()) { - return (a->Y() > b->Y()); - } - if (a->Z() != b->Z()) { - return (a->Z() > b->Z()); - } - return a->QPt() > b->QPt(); -#else - return (a->X() > b->X()); -#endif + ) // clang-format on }); } @@ -1832,20 +1798,18 @@ struct GPUTPCGMMergerSortTracks_comp { if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a.NClusters() != b.NClusters()) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE return a.NClusters() > b.NClusters(); - } - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; -#else - return a.NClusters() > b.NClusters(); -#endif + ) // clang-format on } }; @@ -1856,17 +1820,16 @@ struct GPUTPCGMMergerSortTracksQPt_comp { { const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; -#ifdef GPUCA_DETERMINISTIC_MODE - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); -#else - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); -#endif + ) // clang-format on } }; } // anonymous namespace @@ -1901,20 +1864,18 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a.NClusters() != b.NClusters()) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE return a.NClusters() > b.NClusters(); - } - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; -#else - return a.NClusters() > b.NClusters(); -#endif + ) // clang-format on }; GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp); @@ -1931,17 +1892,16 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; -#ifdef GPUCA_DETERMINISTIC_MODE - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); -#else - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); -#endif + ) // clang-format on }; GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 4e225a61661c2..238b04510862e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -32,7 +32,7 @@ enum attachTypes { attachAttached = 0x40000000, struct InterpolationErrorHit { float posY, posZ; - GPUCA_MERGER_INTERPOLATION_ERROR_TYPE errorY, errorZ; + GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; }; struct InterpolationErrors { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 0c171a74d4e42..f1aac3da9a7a2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -663,7 +663,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->err2Y = err2Y; debugVals->err2Z = err2Z; }); if (rejectChi2 >= rejectInterFill) { - if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)0) { + if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { rejectChi2 = rejectDirect; } else { int32_t retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index d235b3398c062..29524fb80ace0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -308,7 +308,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 - if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)0) { + if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { const float Iz0 = interpolation.hit[ihit].posY - mP[0]; const float Iz1 = interpolation.hit[ihit].posZ - mP[1]; float Iw0 = mC[2] + (float)interpolation.hit[ihit].errorZ; diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index b7da0de4c0e29..fd2aeda2828e3 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -55,7 +55,7 @@ GPUd() void GPUdEdx::computedEdx(GPUdEdxInfo& GPUrestrict() output, const GPUPar output.NHitsSubThresholdOROC3 = countOROC3; } -GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) +GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) { trunclow = count * trunclow / 128; trunchigh = count * trunchigh / 128; @@ -65,7 +65,7 @@ GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE* GPUrestrict() ar CAAlgo::sort(array, array + count); float mean = 0; for (int32_t i = trunclow; i < trunchigh; i++) { - mean += (float)array[i] * (1.f / scalingFactor::factor); + mean += (float)array[i] * (1.f / scalingFactor::factor); } return (mean / (trunchigh - trunclow)); } diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index bcd75af468c28..4d3b652bdc5d1 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -37,7 +37,7 @@ class GPUdEdx GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: - GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE* array, int32_t count, int32_t trunclow, int32_t trunchigh); + GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); GPUd() void checkSubThresh(int32_t roc); template @@ -62,8 +62,8 @@ class GPUdEdx static constexpr int32_t MAX_NCL = GPUCA_ROW_COUNT; // Must fit in mNClsROC (uint8_t)! - GPUCA_DEDX_STORAGE_TYPE mChargeTot[MAX_NCL]; // No need for default, just some memory - GPUCA_DEDX_STORAGE_TYPE mChargeMax[MAX_NCL]; // No need for default, just some memory + GPUCA_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory + GPUCA_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory float mSubThreshMinTot = 0.f; float mSubThreshMinMax = 0.f; uint8_t mNClsROC[4] = {0}; @@ -78,8 +78,8 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) if (roc != mLastROC) { if (mNSubThresh && mCount + mNSubThresh <= MAX_NCL) { for (int32_t i = 0; i < mNSubThresh; i++) { - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); } mNClsROC[mLastROC] += mNSubThresh; mNClsROCSubThresh[mLastROC] += mNSubThresh; @@ -151,8 +151,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= residualGainMapGain; qtot /= residualGainMapGain; - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE)(qtot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE)(qmax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); mNClsROC[roc]++; if (qtot < mSubThreshMinTot) { mSubThreshMinTot = qtot; From d4a5ca78d81244bad4709591011aed486600b248 Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 25 Mar 2025 13:02:28 +0100 Subject: [PATCH 0137/1764] GeneratorHybrid: improve unit treatment So far, units are treated solely in the Generator::addTrack function. This works well for fundamental generators. However, the hybrid generator is a meta generator potentially consisting of a collection of underlying generators that may have completely different units. This may currently lead to wrong generator output, in certain cases. This commit fixes these bugs and introduces unit handling within GeneratorHybrid. --- Generators/include/Generators/Generator.h | 4 ++ .../include/Generators/GeneratorHybrid.h | 1 - Generators/src/GeneratorHybrid.cxx | 60 +++++++++++++++---- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/Generators/include/Generators/Generator.h b/Generators/include/Generators/Generator.h index 640cc80931862..bd35a00793e2d 100644 --- a/Generators/include/Generators/Generator.h +++ b/Generators/include/Generators/Generator.h @@ -78,9 +78,13 @@ class Generator : public FairGenerator /** setters **/ void setMomentumUnit(double val) { mMomentumUnit = val; }; + double getMomentumUnit() const { return mMomentumUnit; } void setEnergyUnit(double val) { mEnergyUnit = val; }; + double getEnergyUnit() const { return mEnergyUnit; } void setPositionUnit(double val) { mPositionUnit = val; }; + double getPositionUnit() const { return mPositionUnit; } void setTimeUnit(double val) { mTimeUnit = val; }; + double getTimeUnit() const { return mTimeUnit; } void setBoost(Double_t val) { mBoost = val; }; void setTriggerMode(ETriggerMode_t val) { mTriggerMode = val; }; void addTrigger(Trigger trigger) { mTriggers.push_back(trigger); }; diff --git a/Generators/include/Generators/GeneratorHybrid.h b/Generators/include/Generators/GeneratorHybrid.h index 955240d6a28fa..b92437b02d874 100644 --- a/Generators/include/Generators/GeneratorHybrid.h +++ b/Generators/include/Generators/GeneratorHybrid.h @@ -54,7 +54,6 @@ class GeneratorHybrid : public Generator { public: - GeneratorHybrid() = default; GeneratorHybrid(const std::string& inputgens); ~GeneratorHybrid(); diff --git a/Generators/src/GeneratorHybrid.cxx b/Generators/src/GeneratorHybrid.cxx index 729d69527c384..83a694703c259 100644 --- a/Generators/src/GeneratorHybrid.cxx +++ b/Generators/src/GeneratorHybrid.cxx @@ -25,6 +25,12 @@ namespace eventgen GeneratorHybrid::GeneratorHybrid(const std::string& inputgens) { + // This generator has trivial unit conversions + setTimeUnit(1.); + setPositionUnit(1.); + setMomentumUnit(1.); + setEnergyUnit(1.); + if (!parseJSON(inputgens)) { LOG(fatal) << "Failed to parse JSON configuration from input generators"; exit(1); @@ -382,6 +388,27 @@ bool GeneratorHybrid::importParticles() } } } + + auto unit_transformer = [](auto& p, auto pos_unit, auto time_unit, auto en_unit, auto mom_unit) { + p.SetMomentum(p.Px() * mom_unit, p.Py() * mom_unit, p.Pz() * mom_unit, p.Energy() * en_unit); + p.SetProductionVertex(p.Vx() * pos_unit, p.Vy() * pos_unit, p.Vz() * pos_unit, p.T() * time_unit); + }; + + auto index_transformer = [](auto& p, int offset) { + for (int i = 0; i < 2; ++i) { + if (p.GetMother(i) != -1) { + const auto newindex = p.GetMother(i) + offset; + p.SetMother(i, newindex); + } + } + if (p.GetNDaughters() > 0) { + for (int i = 0; i < 2; ++i) { + const auto newindex = p.GetDaughter(i) + offset; + p.SetDaughter(i, newindex); + } + } + }; + // Clear particles and event header mParticles.clear(); mMCEventHeader.clearInfo(); @@ -391,23 +418,20 @@ bool GeneratorHybrid::importParticles() LOG(info) << "Importing particles for task " << subIndex; auto subParticles = gens[subIndex]->getParticles(); + auto time_unit = gens[subIndex]->getTimeUnit(); + auto pos_unit = gens[subIndex]->getPositionUnit(); + auto mom_unit = gens[subIndex]->getMomentumUnit(); + auto energy_unit = gens[subIndex]->getEnergyUnit(); + // The particles carry mother and daughter indices, which are relative // to the sub-generator. We need to adjust these indices to reflect that particles // are now embedded into a cocktail. auto offset = mParticles.size(); for (auto& p : subParticles) { - for (int i = 0; i < 2; ++i) { - if (p.GetMother(i) != -1) { - const auto newindex = p.GetMother(i) + offset; - p.SetMother(i, newindex); - } - } - if (p.GetNDaughters() > 0) { - for (int i = 0; i < 2; ++i) { - const auto newindex = p.GetDaughter(i) + offset; - p.SetDaughter(i, newindex); - } - } + // apply the mother-daugher index transformation + index_transformer(p, offset); + // apply unit transformation of sub-generator + unit_transformer(p, pos_unit, time_unit, energy_unit, mom_unit); } mParticles.insert(mParticles.end(), subParticles.begin(), subParticles.end()); @@ -420,6 +444,18 @@ bool GeneratorHybrid::importParticles() LOG(info) << "Importing particles for task " << genIndex; // at this moment the mIndex-th generator is ready to be used mParticles = gens[genIndex]->getParticles(); + + auto time_unit = gens[genIndex]->getTimeUnit(); + auto pos_unit = gens[genIndex]->getPositionUnit(); + auto mom_unit = gens[genIndex]->getMomentumUnit(); + auto energy_unit = gens[genIndex]->getEnergyUnit(); + + // transform units to units of the hybrid generator + for (auto& p : mParticles) { + // apply unit transformation + unit_transformer(p, pos_unit, time_unit, energy_unit, mom_unit); + } + // fetch the event Header information from the underlying generator gens[genIndex]->updateHeader(&mMCEventHeader); mInputTaskQueue.push(genIndex); From 614112ed251d225b96f7476104f427568b359f8e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 16:20:42 +0100 Subject: [PATCH 0138/1764] GPU RTC: Add deterministic mode --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 18 ++++++++++++------ .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 3 +++ .../Base/cuda/GPUReconstructionCUDArtc.cu | 1 + GPU/GPUTracking/Base/hip/CMakeLists.txt | 18 ++++++++++++------ GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 99c59afd2011a..10b37496a2a62 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -85,8 +85,7 @@ add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR} add_custom_command( OUTPUT ${GPU_RTC_BIN}.command COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command - COMMAND_EXPAND_LISTS - VERBATIM + COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command" ) create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) @@ -94,13 +93,20 @@ create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) add_custom_command( OUTPUT ${GPU_RTC_BIN}.command.arch COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing CUDA RTC ARCH file ${GPU_RTC_BIN}.command.arch" + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing CUDA RTC ARCH command file ${GPU_RTC_BIN}.command.arch" ) create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) -set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command.no_fast_math + COMMAND echo -n "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" > ${GPU_RTC_BIN}.command.no_fast_math + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing CUDA RTC NO_FAST_MATH command file ${GPU_RTC_BIN}.command.arch" +) +create_binary_resource(${GPU_RTC_BIN}.command.no_fast_math ${GPU_RTC_BIN}.command.no_fast_math.o) + +set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o ${GPU_RTC_BIN}.command.no_fast_math.o) # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 1a4721035818e..e789dc9b9ebc3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -29,11 +29,13 @@ using namespace o2::gpu; QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_src); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch); +QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math); int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + std::string(mProcessingSettings.rtc.deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; @@ -52,6 +54,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : ""); baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); + baseCommand += mProcessingSettings.rtc.deterministic ? (std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string(""); char shasource[21], shaparam[21], shacmd[21], shakernels[21]; if (mProcessingSettings.rtc.cacheOutput) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 31f9c54c5e7f8..2b6ec52f25831 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -15,6 +15,7 @@ #define GPUCA_GPUCODE_GENRTC #define GPUCA_GPUCODE_COMPILEKERNELS #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) +#define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 16e6e72d56e9a..33963d72be9ab 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -123,8 +123,7 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ add_custom_command( OUTPUT ${GPU_RTC_BIN}.command COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command - COMMAND_EXPAND_LISTS - VERBATIM + COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" ) create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) @@ -132,13 +131,20 @@ create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) add_custom_command( OUTPUT ${GPU_RTC_BIN}.command.arch COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing HIP RTC ARCH file ${GPU_RTC_BIN}.command.arch" + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing HIP RTC ARCH command file ${GPU_RTC_BIN}.command.arch" ) create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) -set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command.no_fast_math + COMMAND echo -n "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" > ${GPU_RTC_BIN}.command.no_fast_math + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing HIP RTC NO_FAST_MATH command file ${GPU_RTC_BIN}.command.no_fast_math" +) +create_binary_resource(${GPU_RTC_BIN}.command.no_fast_math ${GPU_RTC_BIN}.command.no_fast_math.o) + +set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o ${GPU_RTC_BIN}.command.no_fast_math.o) # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index a1f650a2bc56e..e0c5f845a475e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -208,6 +208,7 @@ BeginSubConfig(GPUSettingsProcessingRTC, rtc, configStandalone.proc, "RTC", 0, " AddOption(cacheOutput, bool, false, "", 0, "Cache RTC compilation results") AddOption(optConstexpr, bool, true, "", 0, "Replace constant variables by static constexpr expressions") AddOption(optSpecialCode, int8_t, -1, "", 0, "Insert GPUCA_RTC_SPECIAL_CODE special code during RTC") +AddOption(deterministic, bool, false, "", 0, "Compile RTC in deterministic mode, with NO_FAST_MATH flags and GPUCA_DETERMINISTIC_MODE define") AddOption(compilePerKernel, bool, true, "", 0, "Run one RTC compilation per kernel") AddOption(enable, bool, false, "", 0, "Use RTC to optimize GPU code") AddOption(runTest, int32_t, 0, "", 0, "Do not run the actual benchmark, but just test RTC compilation (1 full test, 2 test only compilation)") From 8459848bd63c60c36b8b455fd830785f1f239dfe Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 17:25:00 +0100 Subject: [PATCH 0139/1764] GPU RTC: Some cosmetic CMake changes to make RTC command lines more homogeneous --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu | 1 + GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 10b37496a2a62..c1cc63aa0fc02 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -84,7 +84,7 @@ add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR} add_custom_command( OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_CUDA_COMPILER} -forward-unknown-to-host-compiler ${GPU_RTC_DEFINES} ${GPU_RTC_FLAGS_SEPARATED} -x cu -fatbin" > ${GPU_RTC_BIN}.command COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command" ) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 2b6ec52f25831..919b5c11477ef 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -16,6 +16,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) +// GPUReconstructionCUDAIncludesHost.h auto-prependended without preprocessor running #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 33963d72be9ab..dd47f4fe8e78b 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -122,7 +122,7 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ add_custom_command( OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_FLAGS_SEPARATED} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" ) From 9e23cd32483a055c5770c572bc3bccb54d6be913 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 17:25:15 +0100 Subject: [PATCH 0140/1764] GPU RTC: Fix float precision for constexpr optimization --- GPU/GPUTracking/utils/qconfig.h | 12 ++++++------ GPU/GPUTracking/utils/qconfig_helpers.h | 16 +++++++++++----- GPU/GPUTracking/utils/qconfigrtc.h | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/utils/qconfig.h b/GPU/GPUTracking/utils/qconfig.h index 79a9bd757b531..bc755e583c3b7 100644 --- a/GPU/GPUTracking/utils/qconfig.h +++ b/GPU/GPUTracking/utils/qconfig.h @@ -250,12 +250,12 @@ enum qConfigRetVal { qcrOK = 0, #define AddVariable(name, type, default) out << qon_mxstr(type) << " " << qon_mxstr(name) << ";\n"; #define AddOptionArray(name, type, count, default, optname, optnameshort, help, ...) out << qon_mxstr(type) << " " << qon_mxstr(name) << "[" << qon_mxstr(count) << "];\n"; #define AddOptionVec(name, type, optname, optnameshort, help, ...) out << "std::vector<" << qon_mxstr(type) << "> " << qon_mxstr(name) << ";\n"; -#define AddVariableRTC(name, type, default) \ - if (useConstexpr) { \ - out << "static constexpr " << qon_mxstr(type) << " " << qon_mxstr(name) << " = " << qConfig::print_type(std::get(tSrc)->name) << ";\n"; \ - out << qon_mxstr(type) << " " << qon_mxstr(qon_mxcat(_dummy_, name)) << ";\n"; \ - } else { \ - AddOption(name, type, default, optname, optnameshort, help); \ +#define AddVariableRTC(name, type, default) \ + if (useConstexpr) { \ + out << "static constexpr " << qon_mxstr(type) << " " << qon_mxstr(name) << " = " << qConfig::print_type(std::get(tSrc)->name, true) << ";\n"; \ + out << qon_mxstr(type) << " " << qon_mxstr(qon_mxcat(_dummy_, name)) << ";\n"; \ + } else { \ + AddOption(name, type, default, optname, optnameshort, help); \ } #define AddOptionRTC(name, type, default, optname, optnameshort, help, ...) AddVariableRTC(name, type, default) #define AddOptionArrayRTC(name, type, count, default, optname, optnameshort, help, ...) \ diff --git a/GPU/GPUTracking/utils/qconfig_helpers.h b/GPU/GPUTracking/utils/qconfig_helpers.h index e721f08ccfa90..51c89b759e9cf 100644 --- a/GPU/GPUTracking/utils/qconfig_helpers.h +++ b/GPU/GPUTracking/utils/qconfig_helpers.h @@ -17,6 +17,7 @@ #include #include +#include #define qon_mcat(a, b) a##b #define qon_mxcat(a, b) qon_mcat(a, b) @@ -30,29 +31,34 @@ namespace qConfig { template -inline std::string print_type(T val) +inline std::string print_type(T val, bool precise = false) { std::ostringstream s; + if constexpr (std::is_same_v || std::is_same_v) { + if (precise) { + s << std::hexfloat; + } + } s << val; return s.str(); }; template <> -inline std::string print_type(char val) +inline std::string print_type(char val, bool precise) { return std::to_string(val); }; template <> -inline std::string print_type(int8_t val) +inline std::string print_type(int8_t val, bool precise) { return std::to_string(val); }; template <> -inline std::string print_type(uint8_t val) +inline std::string print_type(uint8_t val, bool precise) { return std::to_string(val); }; template <> -inline std::string print_type(bool val) +inline std::string print_type(bool val, bool precise) { return val ? "true" : "false"; }; diff --git a/GPU/GPUTracking/utils/qconfigrtc.h b/GPU/GPUTracking/utils/qconfigrtc.h index 97365a6412c5d..8d33ef0ac6355 100644 --- a/GPU/GPUTracking/utils/qconfigrtc.h +++ b/GPU/GPUTracking/utils/qconfigrtc.h @@ -31,6 +31,7 @@ template static std::string qConfigPrintRtc(const T& tSrc, bool useConstexpr) { std::stringstream out; + out << std::hexfloat; #define QCONFIG_PRINT_RTC #include "qconfig.h" #undef QCONFIG_PRINT_RTC From 2ddad04f95a74fda82460408df82a3ae8c35aae8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 21:38:52 +0100 Subject: [PATCH 0141/1764] GPU CMake: Clean up and collect files / kernels to be compiled in deterministic mode in one place --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 14 +++------- GPU/GPUTracking/Base/hip/CMakeLists.txt | 14 +++------- GPU/GPUTracking/CMakeLists.txt | 26 +++++++++++-------- .../Standalone/Benchmark/CMakeLists.txt | 3 +-- GPU/GPUTracking/cmake/kernel_helpers.cmake | 23 ++++++++++++++++ GPU/GPUTracking/display/CMakeLists.txt | 3 ++- 6 files changed, 47 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index c1cc63aa0fc02..de54f09fdc2e1 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -122,8 +122,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) - target_compile_definitions(${targetName} PUBLIC $) - install(FILES ${HDRS} DESTINATION include/GPU) endif() @@ -131,11 +129,14 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") set(TMP_BASELIB GPUTracking) add_library(${MODULE} SHARED ${SRCS}) + add_library(O2::${MODULE} ALIAS ${MODULE}) target_link_libraries(${MODULE} PUBLIC ${TMP_BASELIB}) install(TARGETS GPUTrackingCUDA) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) endif() +target_compile_definitions(${targetName} PRIVATE $) + # Setting target architecture and adding GPU libraries target_link_libraries(${targetName} PRIVATE cuda cudart nvrtc) set_target_cuda_arch(${targetName}) @@ -170,15 +171,6 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o) set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true) - # Disable all non-deterministic floating point to make TPC track model encoding / decoding precise - set_source_files_properties(${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step0attached.cu - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step1unattached.cu - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step0attached.cu - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step1unattached.cu - TARGET_DIRECTORY ${targetName} - PROPERTIES - COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index dd47f4fe8e78b..43259decef956 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -160,8 +160,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) - target_compile_definitions(${targetName} PUBLIC $) - install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP @@ -175,11 +173,14 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") set(TMP_BASELIB GPUTracking) add_library(${MODULE} SHARED ${SRCS}) + add_library(O2::${MODULE} ALIAS ${MODULE}) target_link_libraries(${MODULE} PUBLIC ${TMP_BASELIB}) install(TARGETS GPUTrackingHIP) include_directories(${GPUCA_HIP_SOURCE_DIR}) endif() +target_compile_definitions(${targetName} PRIVATE $) + add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. target_compile_definitions(${MODULE}_CXX PRIVATE $) target_include_directories(${MODULE}_CXX PRIVATE $) @@ -228,15 +229,6 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o) set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true) - # Disable all non-deterministic floating point to make TPC track model encoding / decoding precise - set_source_files_properties(${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step0attached.hip - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step1unattached.hip - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step0attached.hip - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step1unattached.hip - TARGET_DIRECTORY ${targetName} - PROPERTIES - COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index dedfcf5953394..ba2b9d05a3192 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -372,16 +372,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() endif() -# Disable all non-deterministic floating point to make TPC track model encoding / decoding precise -set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx - DataCompression/GPUTPCCompressionKernels.cxx - DataCompression/TPCClusterDecompressor.cxx - DataCompression/GPUTPCDecompressionKernels.cxx - TARGET_DIRECTORY ${targetName} - PROPERTIES - COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") - # GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h) set_source_files_properties(Base/GPUReconstructionLibrary.cxx @@ -423,5 +413,19 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) - target_compile_definitions(${targetName} PUBLIC GPUCA_DETERMINISTIC_MODE) + target_compile_definitions(${targetName} PRIVATE GPUCA_DETERMINISTIC_MODE) endif() + +# Disable all non-deterministic floating point to make TPC track model encoding / decoding precise +set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx + DataCompression/GPUTPCCompressionKernels.cxx + DataCompression/TPCClusterDecompressor.cxx + DataCompression/GPUTPCDecompressionKernels.cxx + TARGET_DIRECTORY ${targetName} + PROPERTIES + COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") +o2_gpu_kernel_set_deterministic(GPUTPCCompressionKernels_step0attached + GPUTPCCompressionKernels_step1unattached + GPUTPCDecompressionKernels_step0attached + GPUTPCDecompressionKernels_step1unattached) diff --git a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt index e418d94b62cb2..eeafcfc44142d 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt @@ -28,10 +28,9 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_executable(ca ${SRCS}) set(targetName ca) target_link_libraries(${targetName} PUBLIC GPUTracking) - endif() -target_compile_definitions(${targetName} PUBLIC $) +target_compile_definitions(${targetName} PRIVATE $) if(ROOT_FOUND) target_sources(${targetName} PRIVATE ../../qa/genEvents.cxx) diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 1f35b6fc468b2..e63b915640e8a 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -142,3 +142,26 @@ function(o2_gpu_kernel_file_list list) list(REMOVE_DUPLICATES TMP_FILE_LIST) set_property(TARGET O2_GPU_KERNELS PROPERTY O2_GPU_KERNELS_FILE_LIST_${list} "${TMP_FILE_LIST}") endfunction() + +function(o2_gpu_kernel_set_deterministic) + if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu" + TARGET_DIRECTORY O2::GPUTrackingCUDA + PROPERTIES + COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip" + TARGET_DIRECTORY O2::GPUTrackingHIP + PROPERTIES + COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + endforeach() + endif() +endfunction() diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index b3107dbec8c79..592ba3b38ff30 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -131,7 +131,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PUBLIC_INCLUDE_DIRECTORIES . SOURCES ${SRCS} ${SRCS_NO_H}) - target_compile_definitions(${targetName} PRIVATE $) target_compile_definitions(${targetName} PRIVATE GPUCA_BUILD_EVENT_DISPLAY_GLFW GPUCA_DISPLAY_GL3W GPUCA_DISPLAY_OPENGL_CORE) install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION include/GPU) @@ -158,6 +157,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION displayTrackFilter) endif() +target_compile_definitions(${targetName} PRIVATE $) + message(STATUS "Building GPU Event Display (Vulkan ${GPUCA_EVENT_DISPLAY_VULKAN}, Wayland ${GPUCA_EVENT_DISPLAY_WAYLAND}, Freetype ${GPUCA_EVENT_DISPLAY_FREETYPE}, Fontconfig ${Fontconfig_FOUND}, Qt ${GPUCA_EVENT_DISPLAY_QT})") target_link_libraries(${targetName} PUBLIC ${GLFW_LIBRARIES} OpenGL::GL) target_include_directories(${targetName} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) From 8b6d22efaaccadcd2ca2d3c991ce692d5529cfe8 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 26 Mar 2025 10:17:13 +0100 Subject: [PATCH 0142/1764] Update CODEOWNERS (#14110) * Update CODEOWNERS Add @fprino to the ITS-related code owners. --- CODEOWNERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 3f6f4a9e42600..a22b122d0e6cd 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -34,7 +34,7 @@ /DataFormats/Detectors/GlobalTracking @shahor02 /DataFormats/Detectors/GlobalTrackingWorkflow @shahor02 /DataFormats/Detectors/HMPID @gvolpe79 -/DataFormats/Detectors/ITSMFT @mcoquet642 @mconcas @shahor02 +/DataFormats/Detectors/ITSMFT @fprino @mcoquet642 @mconcas @shahor02 /DataFormats/Detectors/MUON @AliceO2Group/muon-experts @shahor02 /DataFormats/Detectors/PHOS @peressounko @kharlov /DataFormats/Detectors/Passive @sawenzel @@ -65,7 +65,7 @@ /Detectors/GlobalTracking @shahor02 /Detectors/GlobalTrackingWorkflow @shahor02 /Detectors/HMPID @gvolpe79 -/Detectors/ITSMFT @mcoquet642 @mconcas @shahor02 +/Detectors/ITSMFT @fprino @mcoquet642 @mconcas @shahor02 /Detectors/MUON @AliceO2Group/muon-experts @shahor02 /Detectors/PHOS @peressounko @kharlov /Detectors/Passive @sawenzel From f26e72529547ff5eeca7889f6e0e6ccf1a9bf71d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 08:32:53 +0100 Subject: [PATCH 0143/1764] GPU: Cleanup unused template parameter --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 1 + GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu | 8 +++----- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index d96d5aad74622..9b6562d8e77ee 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -273,6 +273,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.createO2Output > 1) { mProcessingSettings.createO2Output = 1; } + mProcessingSettings.rtc.deterministic = 1; } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 175fd205153ea..75ac0b5f18327 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -618,7 +618,7 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; #define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ + getRTCkernelNum(mInternals->kernelFunctions.size()); \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ if (mProcessingSettings.debugLevel >= 3) { \ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 30bbc76d4c415..9b80880b4fb3c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -49,7 +49,7 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase template class backendInternal; - template + template static int32_t getRTCkernelNum(int32_t k = -1); void getRTCKernelCalls(std::vector& kernels); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f1f459fe021bc..f0c9ba46c4a56 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -55,7 +55,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -111,7 +111,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs +template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) { static int32_t num = k; @@ -121,9 +121,7 @@ int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) return num; } -#define GPUCA_KRNL(x_class, ...) \ - template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); \ - template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); +#define GPUCA_KRNL(x_class, ...) template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL From 95ae41ee037afd6c167e95a6bb98c1f92c389811 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 13:15:36 +0100 Subject: [PATCH 0144/1764] GPU: Unify creation of list of kernel names and kernel numbers --- .../Base/GPUReconstructionProcessing.cxx | 36 +++++++++++++++++++ .../Base/GPUReconstructionProcessing.h | 16 ++++----- .../Base/cuda/GPUReconstructionCUDA.cu | 8 ++--- .../Base/cuda/GPUReconstructionCUDA.h | 5 --- .../cuda/GPUReconstructionCUDAInternals.h | 1 - .../Base/cuda/GPUReconstructionCUDAKernels.cu | 16 +-------- .../Base/opencl/GPUReconstructionOCL.cxx | 2 +- .../Base/opencl/GPUReconstructionOCL.h | 2 -- .../opencl/GPUReconstructionOCLIncludesHost.h | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 19 ++-------- 10 files changed, 51 insertions(+), 56 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 51c48ebbfc0b2..074c4faeb2926 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -124,3 +124,39 @@ std::unique_ptr GPUReconstructionProc gpu_reconstruction_kernels::threadContext::threadContext() = default; gpu_reconstruction_kernels::threadContext::~threadContext() = default; + +template +uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t k) +{ + static int32_t num = k; + if (num < 0) { + throw std::runtime_error("Internal Error - Kernel Number not Set"); + } + return num; +} + +namespace o2::gpu::internal +{ +static std::vector initKernelNames() +{ + std::vector retVal; +#define GPUCA_KRNL(x_class, ...) \ + GPUReconstructionProcessing::GetKernelNum(retVal.size()); \ + retVal.emplace_back(GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class))); +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + return retVal; +} +} // namespace o2::gpu::internal + +const std::vector GPUReconstructionProcessing::mKernelNames = o2::gpu::internal::initKernelNames(); + +#define GPUCA_KRNL(x_class, ...) \ + template uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t); \ + template <> \ + const char* GPUReconstructionProcessing::GetKernelName() \ + { \ + return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 43560616782db..4e763f07b4396 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -74,7 +74,10 @@ class GPUReconstructionProcessing : public GPUReconstruction // Interface to query name of a kernel template - constexpr static const char* GetKernelName(); + static const char* GetKernelName(); + const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; } + template + static uint32_t GetKernelNum(int32_t k = -1); // Public queries for timers auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; } @@ -100,6 +103,8 @@ class GPUReconstructionProcessing : public GPUReconstruction GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + static const std::vector mKernelNames; + int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop @@ -174,15 +179,6 @@ HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t nu return timer->timer[num]; } -#define GPUCA_KRNL(x_class, ...) \ - template <> \ - constexpr const char* GPUReconstructionProcessing::GetKernelName() \ - { \ - return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 75ac0b5f18327..f87d5c8189cdc 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -610,7 +610,7 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); - GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); + GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", GetKernelName(i).c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); } } @@ -618,9 +618,10 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; #define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ + if (GetKernelNum() != j) { \ + GPUFatal("kernel numbers out of sync"); \ + } \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ if (mProcessingSettings.debugLevel >= 3) { \ GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ } \ @@ -628,7 +629,6 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL - if (j != mInternals->kernelModules.size()) { GPUFatal("Did not load all kernels (%u < %u)", j, (uint32_t)mInternals->kernelModules.size()); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 9b80880b4fb3c..a98b14a873ca0 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -46,11 +46,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); - template - class backendInternal; - - template - static int32_t getRTCkernelNum(int32_t k = -1); void getRTCKernelCalls(std::vector& kernels); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index c85d98d85420e..f3fc21243ef0e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -30,7 +30,6 @@ namespace o2::gpu struct GPUReconstructionCUDAInternals { std::vector> kernelModules; // module for RTC compilation std::vector> kernelFunctions; // vector of ptrs to RTC kernels - std::vector kernelNames; // names of kernels cudaStream_t Streams[GPUCA_MAX_STREAMS]; // Pointer to array of CUDA Streams static void getArgPtrs(const void** pArgs) {} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f0c9ba46c4a56..4b3f8a767226c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -55,7 +55,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[GetKernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -111,20 +111,6 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs -int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) -{ - static int32_t num = k; - if (num < 0) { - throw std::runtime_error("Invalid kernel"); - } - return num; -} - -#define GPUCA_KRNL(x_class, ...) template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& kernels) { #define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e92205b9864e6..e724f0f2cbfcd 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -399,7 +399,7 @@ int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() clReleaseMemObject(mInternals->mem_gpu); clReleaseMemObject(mInternals->mem_constant); for (uint32_t i = 0; i < mInternals->kernels.size(); i++) { - clReleaseKernel(mInternals->kernels[i].first); + clReleaseKernel(mInternals->kernels[i]); } mInternals->kernels.clear(); } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 2abae229c74bb..29951cd43f167 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -56,8 +56,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase template int32_t AddKernel(); - template - uint32_t FindKernel(); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 97316cf9aa32e..0bb2f25093789 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -49,7 +49,7 @@ struct GPUReconstructionOCLInternals { cl_mem mem_host; cl_program program; - std::vector> kernels; + std::vector kernels; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index ce6b6553ae1f7..fff69038c056f 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -58,20 +58,6 @@ void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs(args.s, vals...); }, args.v); } -template -inline uint32_t GPUReconstructionOCLBackend::FindKernel() -{ - std::string name(GetKernelName()); - - for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { - if (mInternals->kernels[k].second == name) { - return (k); - } - } - GPUError("Could not find OpenCL kernel %s", name.c_str()); - throw ::std::runtime_error("Requested unsupported OpenCL kernel"); -} - template int32_t GPUReconstructionOCLBackend::AddKernel() { @@ -84,15 +70,14 @@ int32_t GPUReconstructionOCLBackend::AddKernel() GPUError("Error creating OPENCL Kernel: %s", name.c_str()); return 1; } - mInternals->kernels.emplace_back(krnl, name); + mInternals->kernels.emplace_back(krnl); return 0; } template S& GPUReconstructionOCLBackend::getKernelObject() { - static uint32_t krnl = FindKernel(); - return mInternals->kernels[krnl].first; + return mInternals->kernels[GetKernelNum()]; } int32_t GPUReconstructionOCLBackend::AddKernels() From 5f90f0cc2e80b9f801a72fce3f7c8ba68a27275a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 13:18:16 +0100 Subject: [PATCH 0145/1764] GPU: Solve a todo to make the timer atomic flag a member variable --- .../Base/GPUReconstructionProcessing.cxx | 40 ++++++++----------- .../Base/GPUReconstructionProcessing.h | 3 ++ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 074c4faeb2926..bae95ac8d3f38 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -57,17 +57,24 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr } } -namespace o2::gpu -{ -namespace // anonymous +uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) { -static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation -} // anonymous namespace -} // namespace o2::gpu + if (condition && mProcessingSettings.inKernelParallel != 1) { + mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; + } else { + mNActiveThreadsOuterLoop = 1; + } + if (mProcessingSettings.debugLevel >= 5) { + printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); + } + return mNActiveThreadsOuterLoop; +} + +std::atomic_flag GPUReconstructionProcessing::mTimerFlag = ATOMIC_FLAG_INIT; GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) { - while (timerFlag.test_and_set()) { + while (mTimerFlag.test_and_set()) { } if (mTimers.size() <= id) { mTimers.resize(id + 1); @@ -81,20 +88,20 @@ GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer mTimers[id]->count++; } timerMeta* retVal = mTimers[id].get(); - timerFlag.clear(); + mTimerFlag.clear(); return retVal; } GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment) { timerMeta* retVal = nullptr; - while (timerFlag.test_and_set()) { + while (mTimerFlag.test_and_set()) { } if (mTimers.size() > id && mTimers[id]) { retVal = mTimers[id].get(); retVal->count += increment; } - timerFlag.clear(); + mTimerFlag.clear(); return retVal; } @@ -104,19 +111,6 @@ uint32_t GPUReconstructionProcessing::getNextTimerId() return id.fetch_add(1); } -uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) -{ - if (condition && mProcessingSettings.inKernelParallel != 1) { - mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; - } else { - mNActiveThreadsOuterLoop = 1; - } - if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); - } - return mNActiveThreadsOuterLoop; -} - std::unique_ptr GPUReconstructionProcessing::GetThreadContext() { return std::make_unique(); diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4e763f07b4396..b0466efceac24 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -20,6 +20,7 @@ #include "utils/timer.h" #include +#include namespace o2::gpu { @@ -135,6 +136,8 @@ class GPUReconstructionProcessing : public GPUReconstruction uint32_t getNextTimerId(); timerMeta* getTimerById(uint32_t id, bool increment = true); timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step); + + static std::atomic_flag mTimerFlag; }; template From fa403b41e99557774185f49657b0910bb2dbd327 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 08:33:25 +0100 Subject: [PATCH 0146/1764] GPU: Propagate list of noFastMathKernels to GPU RTC and apply special compile settings --- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 13 ++++-- GPU/GPUTracking/CMakeLists.txt | 4 ++ .../cmake/GPUNoFastMathKernels.template.h | 23 +++++++++++ GPU/GPUTracking/cmake/kernel_helpers.cmake | 40 +++++++++---------- 4 files changed, 57 insertions(+), 23 deletions(-) create mode 100644 GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index e789dc9b9ebc3..51d3bd4044e8d 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -31,11 +31,12 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math); +#include "GPUNoFastMathKernels.h" + int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + - std::string(mProcessingSettings.rtc.deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; @@ -54,7 +55,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : ""); baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); - baseCommand += mProcessingSettings.rtc.deterministic ? (std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string(""); char shasource[21], shaparam[21], shacmd[21], shakernels[21]; if (mProcessingSettings.rtc.cacheOutput) { @@ -169,13 +169,20 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) kernel += mProcessingSettings.rtc.compilePerKernel ? kernels[i] : kernelsall; kernel += "}"; - if (fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() || + bool deterministic = mProcessingSettings.rtc.deterministic || o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end(); + const std::string deterministicStr = std::string(deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n"); + + if (fwrite(deterministicStr.c_str(), 1, deterministicStr.size(), fp) != deterministicStr.size() || + fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() || fwrite(_binary_GPUReconstructionCUDArtc_src_start, 1, _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len || fwrite(kernel.c_str(), 1, kernel.size(), fp) != kernel.size()) { throw std::runtime_error("Error writing file"); } fclose(fp); std::string command = baseCommand; + if (deterministic) { + command += std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len); + } command += " -c " + filename + "_" + std::to_string(i) + mRtcSrcExtension + " -o " + filename + "_" + std::to_string(i) + mRtcBinExtension; if (mProcessingSettings.debugLevel < 0) { command += " &> /dev/null"; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index ba2b9d05a3192..631f9f0edff4f 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -242,6 +242,10 @@ file(GENERATE OUTPUT include_gpu_onthefly/GPUReconstructionIncludesDeviceAll.h INPUT Base/GPUReconstructionIncludesDeviceAll.template.h ) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h + INPUT cmake/GPUNoFastMathKernels.template.h +) if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2") include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) endif() diff --git a/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h new file mode 100644 index 0000000000000..dac93277d5ec9 --- /dev/null +++ b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h @@ -0,0 +1,23 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUNoFastMathKernels.h +/// \author David Rohr + +#include +#include + +namespace o2::gpu::internal +{ +// clang-format off +static const std::unordered_set noFastMathKernels = {$>,APPEND,">,PREPEND,">,$ >}; +// clang-format on +} // namespace o2::gpu::internal diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index e63b915640e8a..99699cc72e940 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -17,6 +17,7 @@ define_property(TARGET PROPERTY O2_GPU_KERNELS) define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES) define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES) define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) +define_property(TARGET PROPERTY O2_GPU_KERNEL_NO_FAST_MATH) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") @@ -144,24 +145,23 @@ function(o2_gpu_kernel_file_list list) endfunction() function(o2_gpu_kernel_set_deterministic) - if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) - list(LENGTH ARGV n) - math(EXPR n "${n} - 1") - foreach(i RANGE 0 ${n}) - if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")) - set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu" - TARGET_DIRECTORY O2::GPUTrackingCUDA - PROPERTIES - COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") - endif() - if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")) - set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip" - TARGET_DIRECTORY O2::GPUTrackingHIP - PROPERTIES - COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") - endif() - endforeach() - endif() + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NO_FAST_MATH "${ARGV${i}}") + if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu" + TARGET_DIRECTORY O2::GPUTrackingCUDA + PROPERTIES + COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip" + TARGET_DIRECTORY O2::GPUTrackingHIP + PROPERTIES + COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + endforeach() endfunction() From 78d0f9c0d666f374acc8ed88efb383d75fdab388 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 14:11:58 +0100 Subject: [PATCH 0147/1764] GPU Compile Flags: Set denormals to zero unconditionally Before I kept them in NO_FAST_MATH mode, but this yields warning by nvcc, which for some stupid reason cannot be disabled. And in principle, with denormals globally disabled, it is also deterministic again, and disabling it everywhere is as simple as enabling them everywhere. --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 2 +- GPU/GPUTracking/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Definitions/GPUDefGPUParameters.h | 12 ++++++------ dependencies/FindO2GPU.cmake | 12 +++++++++--- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 3da5b77f80d86..48f292a198b9c 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -27,7 +27,7 @@ set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() - set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) + set(OCL_FLAGS ${OCL_FLAGS} ${GPUCA_OCL_NO_FAST_MATH_FLAGS}) endif() set(OCL_DEFINECL "-D$,$-D>" "-I$,EXCLUDE,^/usr/include/?>,$-I>" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 631f9f0edff4f..ad7dd9c210cd1 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -17,10 +17,10 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2 ${GPUCA_CLANG_FTZ}") endif() elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math ${GPUCA_CLANG_FTZ}") endif() include(cmake/helpers.cmake) diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 5b5a89cc8bc39..910907368e891 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -603,11 +603,11 @@ // #define GPUCA_KERNEL_DEBUGGER_OUTPUT // Some assertions to make sure out parameters are not invalid - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - #ifdef GPUCA_GPUCODE - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); - #endif +static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); +static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); +#ifdef GPUCA_GPUCODE + static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); +#endif // Derived parameters #ifdef GPUCA_USE_TEXTURES @@ -621,5 +621,5 @@ #define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) #define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT - // clang-format on +// clang-format on #endif diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 650a269209d9b..56b53e1be8879 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -84,8 +84,14 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}}) message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() -set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") -set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") +if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set(GPUCA_CLANG_FTZ "") +else() + set(GPUCA_CLANG_FTZ "-mdaz-ftz") +endif() +set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ}") +set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false") +set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2}) add_definitions(-DGPUCA_DETERMINISTIC_MODE) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") @@ -172,7 +178,7 @@ if(ENABLE_CUDA) endif() endif() -# ---------------------------------- HIP ---------------------------------- +# ---------------------------------- OpenCL ---------------------------------- if(ENABLE_OPENCL) find_package(OpenCL) if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO") From 81dad271ef926e4af9809038b0aa32c0bcaf0f66 Mon Sep 17 00:00:00 2001 From: Andrea Sofia Triolo Date: Thu, 27 Mar 2025 08:37:46 +0100 Subject: [PATCH 0148/1764] ITS Efficiency study: modified cuts and code cleanup (#13995) * new method and checks * Rebinning of Z and Phi efficiency plots * ITS efficiency study: code cleanup * Please consider the following formatting changes * ITS efficiency study: raw pointers converted to smart pointers * Please consider the following formatting changes --------- Co-authored-by: Andrea Sofia Triolo Co-authored-by: ALICE Action Bot --- .../studies/include/ITSStudies/Efficiency.h | 10 +- .../postprocessing/studies/src/Efficiency.cxx | 1675 +++++++++-------- 2 files changed, 841 insertions(+), 844 deletions(-) diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h index 19df2279a2813..b6f43bb772390 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h @@ -28,15 +28,8 @@ namespace study using mask_t = o2::dataformats::GlobalTrackID::mask_t; o2::framework::DataProcessorSpec getEfficiencyStudy(mask_t srcTracksMask, mask_t srcClustersMask, bool useMC, std::shared_ptr kineReader); -////// phi cuts for B=0 -float mPhiCutsL0[10][2] = {{-122.5, -122}, {-91.8, -91.7}, {-61, -60}, {-30.1, -29.8}, {30, 30.2}, {59, 59.5}, {88, 89}, {117, 118.5}, {147, 147.8}, {176.5, 176.6}}; -float mPhiCutsL1[12][2] = {{-137, -136.5}, {-114, -113.5}, {-91.5, -91}, {-68.5, -68}, {-45.6, -45.4}, {-23.1, -22.9}, {45.4, 45.6}, {67.4, 67.6}, {89.4, 89.6}, {110.4, 110.6}, {132.4, 132.6}, {154.4, 154.6}}; -float mPhiCutsL2[17][2] = {{-162.85, -162.65}, {-145, -144.5}, {-127, -126.5}, {-109, -108.5}, {-91, -90.5}, {-73, -72.5}, {-55.1, -54.9}, {-37.35, -37.15}, {-19.5, -19}, {36.8, 37}, {54.4, 54.6}, {71.9, 72.1}, {89, 89.5}, {106.4, 106.6}, {123.65, 123.85}, {141.4, 141.6}, {158.9, 159.1}}; - float mEtaCuts[2] = {-1.0, 1.0}; -// float mPtCuts[2] = {1, 4.5}; //// for B=5 float mPtCuts[2] = {0, 10}; /// no cut for B=0 -int mChi2cut = 100; // values obtained from the dca study for B=5 // float dcaXY[3] = {-0.000326, -0.000217, -0.000187}; @@ -55,6 +48,9 @@ int dcaCut = 8; float mDCACutsXY[3][2] = {{dcaXY[0] - dcaCut * sigmaDcaXY[0], dcaXY[0] + dcaCut* sigmaDcaXY[0]}, {dcaXY[1] - dcaCut * sigmaDcaXY[1], dcaXY[1] + dcaCut* sigmaDcaXY[1]}, {dcaXY[2] - dcaCut * sigmaDcaXY[2], dcaXY[2] + dcaCut* sigmaDcaXY[2]}}; // cuts at 8 sigma for each layer for xy. The values represent m-8sigma and m+8sigma float mDCACutsZ[3][2] = {{dcaZ[0] - dcaCut * sigmaDcaZ[0], dcaZ[0] + dcaCut* sigmaDcaZ[0]}, {dcaZ[1] - dcaCut * sigmaDcaZ[1], dcaZ[1] + dcaCut* sigmaDcaZ[1]}, {dcaZ[2] - dcaCut * sigmaDcaZ[2], dcaZ[2] + dcaCut* sigmaDcaZ[2]}}; +/// excluding bad chips in MC that are not present in data: to be checked based on the anchoring +std::vector mExcludedChipMC = {66, 67, 68, 75, 76, 77, 84, 85, 86, 93, 94, 95, 102, 103, 104, 265, 266, 267, 274, 275, 276, 283, 284, 285, 413, 414, 415, 422, 423, 424, 431, 432, 433}; + } // namespace study } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx index 28e09e5d9a3be..bca1ec1e85001 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #define NLAYERS 3 @@ -75,8 +77,6 @@ class EfficiencyStudy : public Task void studyClusterSelectionMC(); void countDuplicatedAfterCuts(); void getEfficiency(bool isMC); - void getEfficiencyAndTrackInfo(bool isMC); - void saveDataInfo(); void process(o2::globaltracking::RecoContainer&); void setClusterDictionary(const o2::itsmft::TopologyDictionary* d) { mDict = d; } @@ -105,20 +105,14 @@ class EfficiencyStudy : public Task // Data GTrackID::mask_t mTracksSrc{}; std::shared_ptr mDataRequest; - unsigned short mMask = 0x7f; // Utils std::shared_ptr mGGCCDBRequest; std::unique_ptr mOutFile; int mDuplicated_layer[NLAYERS] = {0}; - const o2::parameters::GRPMagField* mGRPMagField = nullptr; //// Histos - // Distance betweeen original and duplicated clusters - std::unique_ptr mDistanceClustersX[NLAYERS]; - std::unique_ptr mDistanceClustersY[NLAYERS]; - std::unique_ptr mDistanceClustersZ[NLAYERS]; - std::unique_ptr mDistanceClusters[NLAYERS]; + // DCA betweeen track and original cluster std::unique_ptr mDCAxyOriginal[NLAYERS]; std::unique_ptr mDCAzOriginal[NLAYERS]; @@ -132,16 +126,12 @@ class EfficiencyStudy : public Task // phi, eta, pt of the cluster std::unique_ptr mPhiOriginal[NLAYERS]; - std::unique_ptr mPhiTrackOriginal[NLAYERS]; std::unique_ptr mEtaOriginal[NLAYERS]; std::unique_ptr mPtOriginal[NLAYERS]; - TH1D* mPtDuplicated[NLAYERS]; - TH1D* mEtaDuplicated[NLAYERS]; - TH1D* mPhiDuplicated[NLAYERS]; - TH1D* mPhiTrackDuplicated[NLAYERS]; - TH2D* mPhiTrackDuplicatedvsphiDuplicated[NLAYERS]; - TH2D* mPhiTrackoriginalvsphioriginal[NLAYERS]; - TH1D* mPhiOriginalIfDuplicated[NLAYERS]; + std::unique_ptr mPtDuplicated[NLAYERS]; + std::unique_ptr mEtaDuplicated[NLAYERS]; + std::unique_ptr mPhiDuplicated[NLAYERS]; + std::unique_ptr mPhiOriginalIfDuplicated[NLAYERS]; std::unique_ptr mZvsPhiDUplicated[NLAYERS]; @@ -151,13 +141,6 @@ class EfficiencyStudy : public Task std::unique_ptr m2DClusterOriginalPositions; std::unique_ptr m2DClusterDuplicatedPositions; - std::unique_ptr mXoriginal; - std::unique_ptr mYoriginal; - std::unique_ptr mZoriginal; - std::unique_ptr mXduplicated; - std::unique_ptr mYduplicated; - std::unique_ptr mZduplicated; - // Efficiency histos std::unique_ptr mEfficiencyGoodMatch; std::unique_ptr mEfficiencyFakeMatch; @@ -165,29 +148,37 @@ class EfficiencyStudy : public Task std::unique_ptr mEfficiencyGoodMatch_layer[NLAYERS]; std::unique_ptr mEfficiencyFakeMatch_layer[NLAYERS]; std::unique_ptr mEfficiencyTotal_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPt_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchPt_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchEta_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchEta_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPhi_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPhiTrack_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPhiOriginal_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchPhi_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchPhiTrack_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchPt_layer[NLAYERS]; + std::unique_ptr mEfficiencyFakeMatchPt_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchEta_layer[NLAYERS]; + std::unique_ptr mEfficiencyFakeMatchEta_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchPhi_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchPhiOriginal_layer[NLAYERS]; + std::unique_ptr mEfficiencyFakeMatchPhi_layer[NLAYERS]; + + // std::unique_ptr mEfficiencyColEta[NLAYERS]; + std::unique_ptr mDenColEta[NLAYERS]; + std::unique_ptr mNumColEta[NLAYERS]; + std::unique_ptr mDenRowPhi[NLAYERS]; + std::unique_ptr mNumRowPhi[NLAYERS]; + std::unique_ptr mDenRowCol[NLAYERS]; + std::unique_ptr mNumRowCol[NLAYERS]; // phi, eta, pt of the duplicated cluster per layer - TH2D* mPt_EtaDupl[NLAYERS]; + std::unique_ptr mPt_EtaDupl[NLAYERS]; // duplicated per layer and per cut std::unique_ptr mDuplicatedEtaAllPt[NLAYERS]; std::unique_ptr mDuplicatedEta[NLAYERS][3]; std::unique_ptr mDuplicatedPhiAllPt[NLAYERS]; std::unique_ptr mDuplicatedPhi[NLAYERS][3]; - TH1D* mDuplicatedPt[NLAYERS]; - TH1D* mDuplicatedRow[NLAYERS]; - TH2D* mDuplicatedPtEta[NLAYERS]; - TH2D* mDuplicatedPtPhi[NLAYERS]; - TH2D* mDuplicatedEtaPhi[NLAYERS]; + std::unique_ptr mDuplicatedPt[NLAYERS]; + std::unique_ptr mDuplicatedRow[NLAYERS]; + std::unique_ptr mDuplicatedCol[NLAYERS]; + std::unique_ptr mDuplicatedZ[NLAYERS]; + std::unique_ptr mDuplicatedPtEta[NLAYERS]; + std::unique_ptr mDuplicatedPtPhi[NLAYERS]; + std::unique_ptr mDuplicatedEtaPhi[NLAYERS]; // matches per layer and per cut std::unique_ptr mNGoodMatchesEtaAllPt[NLAYERS]; @@ -200,26 +191,36 @@ class EfficiencyStudy : public Task std::unique_ptr mNFakeMatchesPhiAllPt[NLAYERS]; std::unique_ptr mNFakeMatchesPhi[NLAYERS][3]; - TH1D* mNGoodMatchesPt[NLAYERS]; - TH1D* mNFakeMatchesPt[NLAYERS]; + std::unique_ptr mNGoodMatchesPt[NLAYERS]; + std::unique_ptr mNFakeMatchesPt[NLAYERS]; + + std::unique_ptr mNGoodMatchesRow[NLAYERS]; + std::unique_ptr mNFakeMatchesRow[NLAYERS]; + + std::unique_ptr mNGoodMatchesCol[NLAYERS]; + std::unique_ptr mNFakeMatchesCol[NLAYERS]; - TH1D* mNGoodMatchesRow[NLAYERS]; - TH1D* mNFakeMatchesRow[NLAYERS]; + std::unique_ptr mNGoodMatchesZ[NLAYERS]; + std::unique_ptr mNFakeMatchesZ[NLAYERS]; - TH2D* mNGoodMatchesPtEta[NLAYERS]; - TH2D* mNFakeMatchesPtEta[NLAYERS]; + std::unique_ptr mNGoodMatchesPtEta[NLAYERS]; + std::unique_ptr mNFakeMatchesPtEta[NLAYERS]; - TH2D* mNGoodMatchesPtPhi[NLAYERS]; - TH2D* mNFakeMatchesPtPhi[NLAYERS]; + std::unique_ptr mNGoodMatchesPtPhi[NLAYERS]; + std::unique_ptr mNFakeMatchesPtPhi[NLAYERS]; - TH2D* mNGoodMatchesEtaPhi[NLAYERS]; - TH2D* mNFakeMatchesEtaPhi[NLAYERS]; + std::unique_ptr mNGoodMatchesEtaPhi[NLAYERS]; + std::unique_ptr mNFakeMatchesEtaPhi[NLAYERS]; // calculating the efficiency with TEfficiency class std::unique_ptr mEffPtGood[NLAYERS]; std::unique_ptr mEffPtFake[NLAYERS]; std::unique_ptr mEffRowGood[NLAYERS]; std::unique_ptr mEffRowFake[NLAYERS]; + std::unique_ptr mEffColGood[NLAYERS]; + std::unique_ptr mEffColFake[NLAYERS]; + std::unique_ptr mEffZGood[NLAYERS]; + std::unique_ptr mEffZFake[NLAYERS]; std::unique_ptr mEffPtEtaGood[NLAYERS]; std::unique_ptr mEffPtEtaFake[NLAYERS]; std::unique_ptr mEffPtPhiGood[NLAYERS]; @@ -237,17 +238,15 @@ class EfficiencyStudy : public Task std::unique_ptr mEffPhiFakeAllPt[NLAYERS]; std::unique_ptr mEffPhiFake[NLAYERS][3]; - TH2D* mnGoodMatchesPt_layer[NLAYERS]; - TH2D* mnFakeMatchesPt_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesPt_layer[NLAYERS]; + std::unique_ptr mnFakeMatchesPt_layer[NLAYERS]; - TH2D* mnGoodMatchesEta_layer[NLAYERS]; - TH2D* mnFakeMatchesEta_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesEta_layer[NLAYERS]; + std::unique_ptr mnFakeMatchesEta_layer[NLAYERS]; - TH2D* mnGoodMatchesPhi_layer[NLAYERS]; - TH2D* mnGoodMatchesPhiTrack_layer[NLAYERS]; - TH2D* mnGoodMatchesPhiOriginal_layer[NLAYERS]; - TH2D* mnFakeMatchesPhi_layer[NLAYERS]; - TH2D* mnFakeMatchesPhiTrack_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesPhi_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesPhiOriginal_layer[NLAYERS]; + std::unique_ptr mnFakeMatchesPhi_layer[NLAYERS]; std::unique_ptr DCAxyData[NLAYERS]; std::unique_ptr DCAzData[NLAYERS]; @@ -255,55 +254,77 @@ class EfficiencyStudy : public Task std::unique_ptr DCAxyRejected[NLAYERS]; std::unique_ptr DCAzRejected[NLAYERS]; - std::unique_ptr DistanceClustersX[NLAYERS]; - std::unique_ptr DistanceClustersY[NLAYERS]; - std::unique_ptr DistanceClustersZ[NLAYERS]; - std::unique_ptr DistanceClustersXAftercuts[NLAYERS]; - std::unique_ptr DistanceClustersYAftercuts[NLAYERS]; - std::unique_ptr DistanceClustersZAftercuts[NLAYERS]; - - TH1D* denPt[NLAYERS]; - TH1D* numPt[NLAYERS]; - TH1D* numPtGood[NLAYERS]; - TH1D* numPtFake[NLAYERS]; - - TH1D* denPhi[NLAYERS]; - TH1D* numPhi[NLAYERS]; - TH1D* numPhiGood[NLAYERS]; - TH1D* numPhiFake[NLAYERS]; - - TH1D* denEta[NLAYERS]; - TH1D* numEta[NLAYERS]; - TH1D* numEtaGood[NLAYERS]; - TH1D* numEtaFake[NLAYERS]; + std::unique_ptr denPt[NLAYERS]; + std::unique_ptr numPt[NLAYERS]; + std::unique_ptr numPtGood[NLAYERS]; + std::unique_ptr numPtFake[NLAYERS]; + + std::unique_ptr denPhi[NLAYERS]; + std::unique_ptr numPhi[NLAYERS]; + std::unique_ptr numPhiGood[NLAYERS]; + std::unique_ptr numPhiFake[NLAYERS]; + + std::unique_ptr denEta[NLAYERS]; + std::unique_ptr numEta[NLAYERS]; + std::unique_ptr numEtaGood[NLAYERS]; + std::unique_ptr numEtaFake[NLAYERS]; + + std::unique_ptr denRow[NLAYERS]; + std::unique_ptr numRow[NLAYERS]; + std::unique_ptr numRowGood[NLAYERS]; + std::unique_ptr numRowFake[NLAYERS]; + + std::unique_ptr denCol[NLAYERS]; + std::unique_ptr numCol[NLAYERS]; + std::unique_ptr numColGood[NLAYERS]; + std::unique_ptr numColFake[NLAYERS]; + std::unique_ptr denZ[NLAYERS]; + std::unique_ptr numZ[NLAYERS]; + std::unique_ptr numZGood[NLAYERS]; + std::unique_ptr numZFake[NLAYERS]; + + std::unique_ptr numLayers; + std::unique_ptr denLayers; + std::unique_ptr numGoodLayers; + std::unique_ptr numFakeLayers; int nDuplicatedClusters[NLAYERS] = {0}; int nTracksSelected[NLAYERS] = {0}; // denominator fot the efficiency calculation - TH2D* diffPhivsPt[NLAYERS]; - TH1D* diffTheta[NLAYERS]; - - TH1D* thetaOriginal[NLAYERS]; - TH1D* thetaOriginalCalc[NLAYERS]; - TH1D* thetaDuplicated[NLAYERS]; - TH1D* thetaOriginalCalcWhenDuplicated[NLAYERS]; - TH1D* thetaOriginalWhenDuplicated[NLAYERS]; - std::unique_ptr IPOriginalxy[NLAYERS]; std::unique_ptr IPOriginalz[NLAYERS]; - std::unique_ptr IPOriginalifDuplicatedxy[NLAYERS]; - std::unique_ptr IPOriginalifDuplicatedz[NLAYERS]; std::unique_ptr chipRowDuplicated[NLAYERS]; std::unique_ptr chipRowOriginalIfDuplicated[NLAYERS]; - std::unique_ptr chi2track; std::unique_ptr chi2trackAccepted; + + /// checking where the duplicated not found are (histograms filled with the orifinal cluster variables) + std::unique_ptr phiFound[NLAYERS]; + std::unique_ptr rowFound[NLAYERS]; + std::unique_ptr phiNotFound[NLAYERS]; + std::unique_ptr rowNotFound[NLAYERS]; + std::unique_ptr zFound[NLAYERS]; + std::unique_ptr zNotFound[NLAYERS]; + std::unique_ptr colFoundOriginalVsDuplicated[NLAYERS]; + std::unique_ptr colFoundOriginal[NLAYERS]; + std::unique_ptr colNotFound[NLAYERS]; + std::unique_ptr radiusFound[NLAYERS]; + std::unique_ptr radiusNotFound[NLAYERS]; + std::unique_ptr m2DClusterFoundPositions; + std::unique_ptr m2DClusterNotFoundPositions; + std::unique_ptr mChipNotFound; + std::unique_ptr mChipFound; + std::unique_ptr l0_00; + std::unique_ptr l1_15; + std::unique_ptr l2_19; + std::unique_ptr chipOrigVsOverlap; + std::unique_ptr chipmap; }; void EfficiencyStudy::init(InitContext& ic) { - LOGP(info, "--------------- init"); + LOGP(info, "init"); o2::base::GRPGeomHelper::instance().setRequest(mGGCCDBRequest); @@ -320,15 +341,8 @@ void EfficiencyStudy::init(InitContext& ic) mOutFile = std::make_unique(mOutFileName.c_str(), "recreate"); - mXoriginal = std::make_unique("xoriginal", "x original ;x (cm); ", 200, 0, 0); - mYoriginal = std::make_unique("yoriginal", "y original ;y (cm); ", 200, 0, 0); - mZoriginal = std::make_unique("zoriginal", "z original ;z (cm); ", 300, 0, 0); - mXduplicated = std::make_unique("xduplicated", "x duplicated ;x (cm); ", 200, -10, 10); - mYduplicated = std::make_unique("yduplicated", "y duplicated ;y (cm); ", 200, -10, 10); - mZduplicated = std::make_unique("zduplicated", "z duplicated ;z (cm); ", 300, -30, 30); - - mDCAxyDuplicated = std::make_unique("dcaXYDuplicated", "Distance between track and duplicated cluster ;DCA xy (cm); ", 400, -0.2, 0.2); - mDCAzDuplicated = std::make_unique("dcaZDuplicated", "Distance between track and duplicated cluster ;DCA z (cm); ", 400, -0.2, 0.2); + mDCAxyDuplicated = std::make_unique("dcaXYDuplicated", "Distance between track and duplicated cluster ;DCA xy (cm); ", 200, -0.01, 0.01); + mDCAzDuplicated = std::make_unique("dcaZDuplicated", "Distance between track and duplicated cluster ;DCA z (cm); ", 200, -0.01, 0.01); m3DClusterPositions = std::make_unique("3DClusterPositions", ";x (cm);y (cm);z (cm)", 200, -10, 10, 200, -10, 10, 400, -20, 20); m3DDuplicatedClusterPositions = std::make_unique("3DDuplicatedClusterPositions", ";x (cm);y (cm);z (cm)", 200, -10, 10, 200, -10, 10, 500, -30, 30); @@ -339,152 +353,192 @@ void EfficiencyStudy::init(InitContext& ic) mEfficiencyFakeMatch = std::make_unique("mEfficiencyFakeMatch", ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); mEfficiencyTotal = std::make_unique("mEfficiencyTotal", ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); - chi2track = std::make_unique("chi2track", "; $chi^{2}", 500, 0, 100); - chi2trackAccepted = std::make_unique("chi2trackAccepted", "; $chi^{2}", 500, 0, 100); + chi2trackAccepted = std::make_unique("chi2trackAccepted", "; $#chi^{2}", 500, 0, 100); + + m2DClusterFoundPositions = std::make_unique("m2DClusterFoundPositions", ";x (cm);y (cm)", 250, -5, 5, 250, -5, 5); + m2DClusterNotFoundPositions = std::make_unique("m2DClusterNotFoundPositions", ";x (cm);y (cm)", 250, -5, 5, 250, -5, 5); + mChipNotFound = std::make_unique("mChipNotFound", ";chipID", 432, 0, 432); + mChipFound = std::make_unique("mChipFound", ";chipID", 432, 0, 432); + l0_00 = std::make_unique("l0_00", ";col; row", 2304, -0.5, 9215.5, 128, -0.5, 511.5); + l1_15 = std::make_unique("l1_15", ";col; row", 2304, -0.5, 9215.5, 512, -0.5, 511.5); + l2_19 = std::make_unique("l2_19", ";col; row", 2304, -0.5, 9215.5, 512, -0.5, 511.5); + chipOrigVsOverlap = std::make_unique("chipOrigVsOverlap", ";chipID Overlap;chipID Original", 9, 0, 9, 9, 0, 9); + chipmap = std::make_unique("chipmap", ";Column;Row", 1024, 0, 1023, 512, -0.5, 511.5); + + numLayers = std::make_unique("numLayers", "numLayers; ; Efficiency", 3, -0.5, 2.5); + numGoodLayers = std::make_unique("numGoodLayers", "numGoodLayers; ; Efficiency", 3, -0.5, 2.5); + numFakeLayers = std::make_unique("numFakeLayers", "numFakeLayers; ; Efficiency", 3, -0.5, 2.5); + denLayers = std::make_unique("denLayers", "denLayers; ; Efficiency", 3, -0.5, 2.5); for (int i = 0; i < NLAYERS; i++) { chipRowDuplicated[i] = std::make_unique(Form("chipPosDuplicated_L%d", i), Form("L%d; row", i), 512, -0.5, 511.5); chipRowOriginalIfDuplicated[i] = std::make_unique(Form("chipPosOriginalIfDuplicated%d", i), Form("L%d; row", i), 512, -0.5, 511.5); - DCAxyData[i] = std::make_unique(Form("dcaXYData_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 4000, -2, 2); - DCAzData[i] = std::make_unique(Form("dcaZData_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 4000, -2, 2); + DCAxyData[i] = std::make_unique(Form("dcaXYData_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 4000, -0.2, 0.2); + DCAzData[i] = std::make_unique(Form("dcaZData_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 4000, -0.2, 0.2); DCAxyRejected[i] = std::make_unique(Form("DCAxyRejected%d", i), "Distance between track and original cluster (rejected) ;DCA xy (cm); ", 30000, -30, 30); DCAzRejected[i] = std::make_unique(Form("DCAzRejected%d", i), "Distance between track and original cluster (rejected) ;DCA z (cm); ", 30000, -30, 30); - DistanceClustersX[i] = std::make_unique(Form("distanceClustersX_L%d", i), ";Distance x (cm); ", 100, 0, 1); - DistanceClustersY[i] = std::make_unique(Form("distanceClustersY_L%d", i), ";Distance y (cm); ", 100, 0, 1); - DistanceClustersZ[i] = std::make_unique(Form("distanceClustersZ_L%d", i), ";Distance z (cm); ", 100, 0, 1); - DistanceClustersXAftercuts[i] = std::make_unique(Form("distanceClustersXAftercuts_L%d", i), ";Distance x (cm); ", 100, 0, 1); - DistanceClustersYAftercuts[i] = std::make_unique(Form("distanceClustersYAftercuts_L%d", i), ";Distance y (cm); ", 100, 0, 1); - DistanceClustersZAftercuts[i] = std::make_unique(Form("distanceClustersZAftercuts_L%d", i), ";Distance z (cm); ", 100, 0, 1); + mDCAxyOriginal[i] = std::make_unique(Form("dcaXYOriginal_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 200, -0.01, 0.01); + mDCAzOriginal[i] = std::make_unique(Form("dcaZOriginal_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 200, -0.01, 0.01); - mDistanceClustersX[i] = std::make_unique(Form("distanceClustersX_L%d", i), ";Distance x (cm); ", 100, 0, 1); - mDistanceClustersY[i] = std::make_unique(Form("distanceClustersY_L%d", i), ";Distance y (cm); ", 100, 0, 1); - mDistanceClustersZ[i] = std::make_unique(Form("distanceClustersZ_L%d", i), ";Distance z (cm); ", 100, 0, 1); - mDistanceClusters[i] = std::make_unique(Form("distanceClusters_L%d", i), ";Distance (cm); ", 100, 0, 1); - - mDCAxyOriginal[i] = std::make_unique(Form("dcaXYOriginal_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 400, -0.2, 0.2); - mDCAzOriginal[i] = std::make_unique(Form("dcaZOriginal_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 400, -0.2, 0.2); - - mPhiOriginal[i] = std::make_unique(Form("phiOriginal_L%d", i), ";phi (deg); ", 1440, -180, 180); - mPhiTrackOriginal[i] = std::make_unique(Form("phiTrackOriginal_L%d", i), ";phi Track (deg); ", 1440, 0, 360); - mEtaOriginal[i] = std::make_unique(Form("etaOriginal_L%d", i), ";eta (deg); ", 100, -2, 2); + mPhiOriginal[i] = std::make_unique(Form("phiOriginal_L%d", i), ";phi (rad); ", 90, -3.2, 3.2); + mEtaOriginal[i] = std::make_unique(Form("etaOriginal_L%d", i), ";eta (rad); ", 100, -2, 2); mPtOriginal[i] = std::make_unique(Form("ptOriginal_L%d", i), ";pt (GeV/c); ", 100, 0, 10); - mZvsPhiDUplicated[i] = std::make_unique(Form("zvsphiDuplicated_L%d", i), ";z (cm);phi (deg)", 400, -20, 20, 1440, -180, 180); + mZvsPhiDUplicated[i] = std::make_unique(Form("zvsphiDuplicated_L%d", i), ";z (cm);phi (rad)", 400, -20, 20, 90, -3.2, 3.2); - mPtDuplicated[i] = new TH1D(Form("ptDuplicated_L%d", i), ";pt (GeV/c); ", nbPt, 0, 7.5); // xbins); - mEtaDuplicated[i] = new TH1D(Form("etaDuplicated_L%d", i), ";eta; ", 40, -2, 2); - mPhiDuplicated[i] = new TH1D(Form("phiDuplicated_L%d", i), ";phi (deg); ", 1440, -180, 180); - mPhiTrackDuplicated[i] = new TH1D(Form("phiTrackDuplicated_L%d", i), ";phi Track (deg); ", 1440, 0, 360); - mPhiOriginalIfDuplicated[i] = new TH1D(Form("phiOriginalIfDuplicated_L%d", i), ";phi (deg); ", 1440, -180, 180); - mPhiTrackDuplicatedvsphiDuplicated[i] = new TH2D(Form("phiTrackDuplicatedvsphiDuplicated_L%d", i), ";phi track (deg);phi oridinal if duplicated (deg); ", 1440, 0, 360, 1440, -180, 180); - mPhiTrackoriginalvsphioriginal[i] = new TH2D(Form("phiTrackoriginalvsphioriginal_L%d", i), ";phi track (deg);phi original (deg); ", 1440, 0, 360, 1440, -180, 180); - mDCAxyDuplicated_layer[i] = std::make_unique(Form("dcaXYDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA xy (cm); ", 400, -0.2, 0.2); - mDCAzDuplicated_layer[i] = std::make_unique(Form("dcaZDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA z (cm); ", 400, -0.2, 0.2); + mPtDuplicated[i] = std::make_unique(Form("ptDuplicated_L%d", i), ";pt (GeV/c); ", nbPt, 0, 7.5); // xbins); + mEtaDuplicated[i] = std::make_unique(Form("etaDuplicated_L%d", i), ";eta; ", 40, -2, 2); + mPhiDuplicated[i] = std::make_unique(Form("phiDuplicated_L%d", i), ";phi (rad); ", 90, -3.2, 3.2); + mPhiOriginalIfDuplicated[i] = std::make_unique(Form("phiOriginalIfDuplicated_L%d", i), ";phi (rad); ", 90, -3.2, 3.2); + mDCAxyDuplicated_layer[i] = std::make_unique(Form("dcaXYDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA xy (cm); ", 100, -0.01, 0.01); + mDCAzDuplicated_layer[i] = std::make_unique(Form("dcaZDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA z (cm); ", 100, -0.01, 0.01); mEfficiencyGoodMatch_layer[i] = std::make_unique(Form("mEfficiencyGoodMatch_layer_L%d", i), ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); mEfficiencyFakeMatch_layer[i] = std::make_unique(Form("mEfficiencyFakeMatch_layer_L%d", i), ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); mEfficiencyTotal_layer[i] = std::make_unique(Form("mEfficiencyTotal_layer_L%d", i), ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); - mEfficiencyGoodMatchPt_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); - mEfficiencyFakeMatchPt_layer[i] = new TH2D(Form("mEfficiencyFakeMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); + mEfficiencyGoodMatchPt_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); + mEfficiencyFakeMatchPt_layer[i] = std::make_unique(Form("mEfficiencyFakeMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); - mEfficiencyGoodMatchEta_layer[i] = new TH2D(Form("mEfficiencyGoodMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); - mEfficiencyFakeMatchEta_layer[i] = new TH2D(Form("mEfficiencyFakeMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); + mEfficiencyGoodMatchEta_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); + mEfficiencyFakeMatchEta_layer[i] = std::make_unique(Form("mEfficiencyFakeMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); - mEfficiencyGoodMatchPhi_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 1440, -180, 180, 20, 0.5, 20.5); - mEfficiencyGoodMatchPhiTrack_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPhiTrack_layer_L%d", i), ";#phi track;#sigma(DCA) cut;Efficiency;", 1440, 0, 360, 20, 0.5, 20.5); - mEfficiencyGoodMatchPhiOriginal_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPhiOriginal_layer_L%d", i), ";#phi Original;#sigma(DCA) cut;Efficiency;", 1440, -180, 180, 20, 0.5, 20.5); - mEfficiencyFakeMatchPhi_layer[i] = new TH2D(Form("mEfficiencyFakeMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 1440, -180, 180, 20, 0.5, 20.5); - mEfficiencyFakeMatchPhiTrack_layer[i] = new TH2D(Form("mEfficiencyFakeMatchPhiTrack_layer_L%d", i), ";#phi Track;#sigma(DCA) cut;Efficiency;", 1440, 0, 360, 20, 0.5, 20.5); + mEfficiencyGoodMatchPhi_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 90, -3.2, 3.2, 20, 0.5, 20.5); + mEfficiencyGoodMatchPhiOriginal_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchPhiOriginal_layer_L%d", i), ";#phi Original;#sigma(DCA) cut;Efficiency;", 90, -3.2, 3.2, 20, 0.5, 20.5); + mEfficiencyFakeMatchPhi_layer[i] = std::make_unique(Form("mEfficiencyFakeMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 90, -3.2, 3.2, 20, 0.5, 20.5); - mPt_EtaDupl[i] = new TH2D(Form("mPt_EtaDupl_L%d", i), ";#it{p}_{T} (GeV/c);#eta; ", 100, 0, 10, 100, -2, 2); + mPt_EtaDupl[i] = std::make_unique(Form("mPt_EtaDupl_L%d", i), ";#it{p}_{T} (GeV/c);#eta; ", 100, 0, 10, 100, -2, 2); - mDuplicatedPt[i] = new TH1D(Form("mDuplicatedPt_log_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/); + mDuplicatedPt[i] = std::make_unique(Form("mDuplicatedPt_log_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/); mDuplicatedPt[i]->Sumw2(); - mNGoodMatchesPt[i] = new TH1D(Form("mNGoodMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/); + mNGoodMatchesPt[i] = std::make_unique(Form("mNGoodMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/); mNGoodMatchesPt[i]->Sumw2(); - mNFakeMatchesPt[i] = new TH1D(Form("mNFakeMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of fake matches L%d", i), nbPt, 0, 7.5 /* xbins*/); + mNFakeMatchesPt[i] = std::make_unique(Form("mNFakeMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of fake matches L%d", i), nbPt, 0, 7.5 /* xbins*/); mNFakeMatchesPt[i]->Sumw2(); - mDuplicatedRow[i] = new TH1D(Form("mDuplicatedRow_L%d", i), Form("; Row; Number of duplciated clusters L%d", i), 512, -0.5, 511.5); + mDuplicatedRow[i] = std::make_unique(Form("mDuplicatedRow_L%d", i), Form("; Row; Number of duplciated clusters L%d", i), 128, -0.5, 511.5); mDuplicatedRow[i]->Sumw2(); - mNGoodMatchesRow[i] = new TH1D(Form("mNGoodMatchesRow_L%d", i), Form("; Row; Number of good matches L%d", i), 512, -0.5, 511.5); + mNGoodMatchesRow[i] = std::make_unique(Form("mNGoodMatchesRow_L%d", i), Form("; Row; Number of good matches L%d", i), 128, -0.5, 511.5); mNGoodMatchesRow[i]->Sumw2(); - mNFakeMatchesRow[i] = new TH1D(Form("mNFakeMatchesRow_L%d", i), Form(";Row; Number of fake matches L%d", i), 512, -0.5, 511.5); + mNFakeMatchesRow[i] = std::make_unique(Form("mNFakeMatchesRow_L%d", i), Form(";Row; Number of fake matches L%d", i), 128, -0.5, 511.5); mNFakeMatchesRow[i]->Sumw2(); - mDuplicatedPtEta[i] = new TH2D(Form("mDuplicatedPtEta_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); + mDuplicatedCol[i] = std::make_unique(Form("mDuplicatedCol_L%d", i), Form("; Col; Number of duplciated clusters L%d", i), 128, -0.5, 1023.5); + mDuplicatedCol[i]->Sumw2(); + mNGoodMatchesCol[i] = std::make_unique(Form("mNGoodMatchesCol_L%d", i), Form("; Col; Number of good matches L%d", i), 128, -0.5, 1023.5); + mNGoodMatchesCol[i]->Sumw2(); + mNFakeMatchesCol[i] = std::make_unique(Form("mNFakeMatchesCol_L%d", i), Form(";Col; Number of fake matches L%d", i), 128, -0.5, 1023.5); + mNFakeMatchesCol[i]->Sumw2(); + + mDuplicatedZ[i] = std::make_unique(Form("mDuplicatedZ_L%d", i), Form("; Z (cm); Number of duplciated clusters L%d", i), 100, -15, 15); + mDuplicatedZ[i]->Sumw2(); + mNGoodMatchesZ[i] = std::make_unique(Form("mNGoodMatchesZ_L%d", i), Form("; Z (cm); Number of good matches L%d", i), 100, -15, 15); + mNGoodMatchesZ[i]->Sumw2(); + mNFakeMatchesZ[i] = std::make_unique(Form("mNFakeMatchesZ_L%d", i), Form(";Z (cm); Number of fake matches L%d", i), 100, -15, 15); + mNFakeMatchesZ[i]->Sumw2(); + + mDuplicatedPtEta[i] = std::make_unique(Form("mDuplicatedPtEta_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); mDuplicatedPtEta[i]->Sumw2(); - mNGoodMatchesPtEta[i] = new TH2D(Form("mNGoodMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); + mNGoodMatchesPtEta[i] = std::make_unique(Form("mNGoodMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); mNGoodMatchesPtEta[i]->Sumw2(); - mNFakeMatchesPtEta[i] = new TH2D(Form("mNFakeMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); + mNFakeMatchesPtEta[i] = std::make_unique(Form("mNFakeMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); mNFakeMatchesPtEta[i]->Sumw2(); - mDuplicatedPtPhi[i] = new TH2D(Form("mDuplicatedPtPhi_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (deg); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 1440, -180, 180); + mDuplicatedPtPhi[i] = std::make_unique(Form("mDuplicatedPtPhi_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (rad); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 90, -3.2, 3.2); mDuplicatedPtPhi[i]->Sumw2(); - mNGoodMatchesPtPhi[i] = new TH2D(Form("mNGoodMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (deg); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 1440, -180, 180); + mNGoodMatchesPtPhi[i] = std::make_unique(Form("mNGoodMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (rad); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 90, -3.2, 3.2); mNGoodMatchesPtPhi[i]->Sumw2(); - mNFakeMatchesPtPhi[i] = new TH2D(Form("mNFakeMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (deg); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 1440, -180, 180); + mNFakeMatchesPtPhi[i] = std::make_unique(Form("mNFakeMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (rad); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 90, -3.2, 3.2); mNFakeMatchesPtPhi[i]->Sumw2(); - mDuplicatedEtaPhi[i] = new TH2D(Form("mDuplicatedEtaPhi_L%d", i), Form("; #eta;#phi (deg); Number of duplciated clusters L%d", i), 40, -2, 2, 1440, -180, 180); + mDuplicatedEtaPhi[i] = std::make_unique(Form("mDuplicatedEtaPhi_L%d", i), Form("; #eta;#phi (rad); Number of duplciated clusters L%d", i), 40, -2, 2, 90, -3.2, 3.2); mDuplicatedEtaPhi[i]->Sumw2(); - mNGoodMatchesEtaPhi[i] = new TH2D(Form("mNGoodMatchesEtaPhi_L%d", i), Form("; #eta;#phi (deg); Number of good matches L%d", i), 40, -2, 2, 1440, -180, 180); + mNGoodMatchesEtaPhi[i] = std::make_unique(Form("mNGoodMatchesEtaPhi_L%d", i), Form("; #eta;#phi (rad); Number of good matches L%d", i), 40, -2, 2, 90, -3.2, 3.2); mNGoodMatchesEtaPhi[i]->Sumw2(); - mNFakeMatchesEtaPhi[i] = new TH2D(Form("mNFakeMatchesEtaPhi_L%d", i), Form("; #eta;#phi (deg); Number of good matches L%d", i), 40, -2, 2, 1440, -180, 180); + mNFakeMatchesEtaPhi[i] = std::make_unique(Form("mNFakeMatchesEtaPhi_L%d", i), Form("; #eta;#phi (rad); Number of good matches L%d", i), 40, -2, 2, 90, -3.2, 3.2); mNFakeMatchesEtaPhi[i]->Sumw2(); mDuplicatedEtaAllPt[i] = std::make_unique(Form("mDuplicatedEtaAllPt_L%d", i), Form("; #eta; Number of duplicated clusters L%d", i), 40, -2, 2); mNGoodMatchesEtaAllPt[i] = std::make_unique(Form("mNGoodMatchesEtaAllPt_L%d", i), Form("; #eta; Number of good matches L%d", i), 40, -2, 2); mNFakeMatchesEtaAllPt[i] = std::make_unique(Form("mNFakeMatchesEtaAllPt_L%d", i), Form("; #eta; Number of fake matches L%d", i), 40, -2, 2); - mDuplicatedPhiAllPt[i] = std::make_unique(Form("mDuplicatedPhiAllPt_L%d", i), Form("; #phi (deg); Number of duplicated clusters L%d", i), 1440, -180, 180); - mNGoodMatchesPhiAllPt[i] = std::make_unique(Form("mNGoodMatchesPhiAllPt_L%d", i), Form("; #phi (deg); Number of good matches L%d", i), 1440, -180, 180); - mNFakeMatchesPhiAllPt[i] = std::make_unique(Form("mNFakeMatchesPhiAllPt_L%d", i), Form("; #phi (deg); Number of fake matches L%d", i), 1440, -180, 180); - - mnGoodMatchesPt_layer[i] = new TH2D(Form("mnGoodMatchesPt_layer_L%d", i), ";pt; nGoodMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); - mnFakeMatchesPt_layer[i] = new TH2D(Form("mnFakeMatchesPt_layer_L%d", i), ";pt; nFakeMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); - mnGoodMatchesEta_layer[i] = new TH2D(Form("mnGoodMatchesEta_layer_L%d", i), ";#eta; nGoodMatches", 40, -2, 2, 20, 0.5, 20.5); - mnFakeMatchesEta_layer[i] = new TH2D(Form("mnFakeMatchesEta_layer_L%d", i), ";#eta; nFakeMatches", 40, -2, 2, 20, 0.5, 20.5); - mnGoodMatchesPhi_layer[i] = new TH2D(Form("mnGoodMatchesPhi_layer_L%d", i), ";#Phi; nGoodMatches", 1440, -180, 180, 20, 0.5, 20.5); - mnGoodMatchesPhiTrack_layer[i] = new TH2D(Form("mnGoodMatchesPhiTrack_layer_L%d", i), ";#Phi track; nGoodMatches", 1440, 0, 360, 20, 0.5, 20.5); - mnGoodMatchesPhiOriginal_layer[i] = new TH2D(Form("mnGoodMatchesPhiOriginal_layer_L%d", i), ";#Phi of the original Cluster; nGoodMatches", 1440, -180, 180, 20, 0.5, 20.5); - mnFakeMatchesPhi_layer[i] = new TH2D(Form("mnFakeMatchesPhi_layer_L%d", i), ";#Phi; nFakeMatches", 1440, -180, 180, 20, 0.5, 20.5); - mnFakeMatchesPhiTrack_layer[i] = new TH2D(Form("mnFakeMatchesPhiTrack_layer_L%d", i), ";#Phi track; nFakeMatches", 1440, 0, 360, 20, 0.5, 20.5); - - denPt[i] = new TH1D(Form("denPt_L%d", i), Form("denPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); - numPt[i] = new TH1D(Form("numPt_L%d", i), Form("numPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); - numPtGood[i] = new TH1D(Form("numPtGood_L%d", i), Form("numPtGood_L%d", i), nbPt, 0, 7.5 /* xbins*/); - numPtFake[i] = new TH1D(Form("numPtFake_L%d", i), Form("numPtFake_L%d", i), nbPt, 0, 7.5 /* xbins*/); - - denPhi[i] = new TH1D(Form("denPhi_L%d", i), Form("denPhi_L%d", i), 1440, -180, 180); - numPhi[i] = new TH1D(Form("numPhi_L%d", i), Form("numPhi_L%d", i), 1440, -180, 180); - numPhiGood[i] = new TH1D(Form("numPhiGood_L%d", i), Form("numPhiGood_L%d", i), 1440, -180, 180); - numPhiFake[i] = new TH1D(Form("numPhiFake_L%d", i), Form("numPhiFake_L%d", i), 1440, -180, 180); - - denEta[i] = new TH1D(Form("denEta_L%d", i), Form("denEta_L%d", i), 200, -2, 2); - numEta[i] = new TH1D(Form("numEta_L%d", i), Form("numEta_L%d", i), 200, -2, 2); - numEtaGood[i] = new TH1D(Form("numEtaGood_L%d", i), Form("numEtaGood_L%d", i), 200, -2, 2); - numEtaFake[i] = new TH1D(Form("numEtaFake_L%d", i), Form("numEtaFake_L%d", i), 200, -2, 2); - - diffPhivsPt[i] = new TH2D(Form("diffPhivsPt_L%d", i), Form("diffPhivsPt_L%d", i), nbPt, 0, 7.5 /* xbins*/, 50, 0, 5); + mDuplicatedPhiAllPt[i] = std::make_unique(Form("mDuplicatedPhiAllPt_L%d", i), Form("; #phi (rad); Number of duplicated clusters L%d", i), 90, -3.2, 3.2); + mNGoodMatchesPhiAllPt[i] = std::make_unique(Form("mNGoodMatchesPhiAllPt_L%d", i), Form("; #phi (rad); Number of good matches L%d", i), 90, -3.2, 3.2); + mNFakeMatchesPhiAllPt[i] = std::make_unique(Form("mNFakeMatchesPhiAllPt_L%d", i), Form("; #phi (rad); Number of fake matches L%d", i), 90, -3.2, 3.2); + + mnGoodMatchesPt_layer[i] = std::make_unique(Form("mnGoodMatchesPt_layer_L%d", i), ";pt; nGoodMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); + mnFakeMatchesPt_layer[i] = std::make_unique(Form("mnFakeMatchesPt_layer_L%d", i), ";pt; nFakeMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); + mnGoodMatchesEta_layer[i] = std::make_unique(Form("mnGoodMatchesEta_layer_L%d", i), ";#eta; nGoodMatches", 40, -2, 2, 20, 0.5, 20.5); + mnFakeMatchesEta_layer[i] = std::make_unique(Form("mnFakeMatchesEta_layer_L%d", i), ";#eta; nFakeMatches", 40, -2, 2, 20, 0.5, 20.5); + mnGoodMatchesPhi_layer[i] = std::make_unique(Form("mnGoodMatchesPhi_layer_L%d", i), ";#Phi; nGoodMatches", 90, -3.2, 3.2, 20, 0.5, 20.5); + mnGoodMatchesPhiOriginal_layer[i] = std::make_unique(Form("mnGoodMatchesPhiOriginal_layer_L%d", i), ";#Phi of the original Cluster; nGoodMatches", 90, -3.2, 3.2, 20, 0.5, 20.5); + mnFakeMatchesPhi_layer[i] = std::make_unique(Form("mnFakeMatchesPhi_layer_L%d", i), ";#Phi; nFakeMatches", 90, -3.2, 3.2, 20, 0.5, 20.5); + + denPt[i] = std::make_unique(Form("denPt_L%d", i), Form("denPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); + numPt[i] = std::make_unique(Form("numPt_L%d", i), Form("numPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); + numPtGood[i] = std::make_unique(Form("numPtGood_L%d", i), Form("numPtGood_L%d", i), nbPt, 0, 7.5 /* xbins*/); + numPtFake[i] = std::make_unique(Form("numPtFake_L%d", i), Form("numPtFake_L%d", i), nbPt, 0, 7.5 /* xbins*/); + + denPhi[i] = std::make_unique(Form("denPhi_L%d", i), Form("denPhi_L%d", i), 90, -3.2, 3.2); + numPhi[i] = std::make_unique(Form("numPhi_L%d", i), Form("numPhi_L%d", i), 90, -3.2, 3.2); + numPhiGood[i] = std::make_unique(Form("numPhiGood_L%d", i), Form("numPhiGood_L%d", i), 90, -3.2, 3.2); + numPhiFake[i] = std::make_unique(Form("numPhiFake_L%d", i), Form("numPhiFake_L%d", i), 90, -3.2, 3.2); + + denEta[i] = std::make_unique(Form("denEta_L%d", i), Form("denEta_L%d", i), 200, -2, 2); + numEta[i] = std::make_unique(Form("numEta_L%d", i), Form("numEta_L%d", i), 200, -2, 2); + numEtaGood[i] = std::make_unique(Form("numEtaGood_L%d", i), Form("numEtaGood_L%d", i), 200, -2, 2); + numEtaFake[i] = std::make_unique(Form("numEtaFake_L%d", i), Form("numEtaFake_L%d", i), 200, -2, 2); + + denRow[i] = std::make_unique(Form("denRow_L%d", i), Form("denRow_L%d", i), 128, -0.5, 511.5); + numRow[i] = std::make_unique(Form("numRow_L%d", i), Form("numRow_L%d", i), 128, -0.5, 511.5); + numRowGood[i] = std::make_unique(Form("numRowGood_L%d", i), Form("numRowGood_L%d", i), 128, -0.5, 511.5); + numRowFake[i] = std::make_unique(Form("numRowFake_L%d", i), Form("numRowFake_L%d", i), 128, -0.5, 511.5); + + denCol[i] = std::make_unique(Form("denCol_L%d", i), Form("denCol_L%d", i), 128, -0.5, 1023.5); + numCol[i] = std::make_unique(Form("numCol_L%d", i), Form("numCol_L%d", i), 128, -0.5, 1023.5); + numColGood[i] = std::make_unique(Form("numColGood_L%d", i), Form("numColGood_L%d", i), 128, -0.5, 1023.5); + numColFake[i] = std::make_unique(Form("numColFake_L%d", i), Form("numColFake_L%d", i), 128, -0.5, 1023.5); + + denZ[i] = std::make_unique(Form("denZ_L%d", i), Form("denZ_L%d", i), 100, -15, 15); + numZ[i] = std::make_unique(Form("numZ_L%d", i), Form("numZ_L%d", i), 100, -15, 15); + numZGood[i] = std::make_unique(Form("numZGood_L%d", i), Form("numZGood_L%d", i), 100, -15, 15); + numZFake[i] = std::make_unique(Form("numZFake_L%d", i), Form("numZFake_L%d", i), 100, -15, 15); + + mDenColEta[i] = std::make_unique(Form("mDenColEta_L%d", i), Form("mDenColEta_L%d", i), 128, -0.5, 1023.5, 50, -1, 1); + mNumColEta[i] = std::make_unique(Form("mNumColEta_L%d", i), Form("mNumColEta_L%d", i), 128, -0.5, 1023.5, 50, -1, 1); + + mDenRowPhi[i] = std::make_unique(Form("mDenRowPhi_L%d", i), Form("mDenRowPhi_L%d", i), 128, -0.5, 511.5, 90, -3.2, 3.2); + mNumRowPhi[i] = std::make_unique(Form("mNumRowPhi_L%d", i), Form("mNumRowPhi_L%d", i), 128, -0.5, 511.5, 90, -3.2, 3.2); + + mDenRowCol[i] = std::make_unique(Form("mDenRowCol_L%d", i), Form("mDenRowCol_L%d", i), 128, -0.5, 511.5, 128, -0.5, 1023.5); + mNumRowCol[i] = std::make_unique(Form("mNumRowCol_L%d", i), Form("mNumRowCol_L%d", i), 128, -0.5, 511.5, 128, -0.5, 1023.5); IPOriginalxy[i] = std::make_unique(Form("IPOriginalxy_L%d", i), Form("IPOriginalxy_L%d", i), 500, -0.002, 0.002); IPOriginalz[i] = std::make_unique(Form("IPOriginalz_L%d", i), Form("IPOriginalz_L%d", i), 200, -10, 10); - IPOriginalifDuplicatedxy[i] = std::make_unique(Form("IPOriginalifDuplicatedxy_L%d", i), Form("IPOriginalifDuplicatedxy_L%d", i), 1000, -0.005, 0.005); - IPOriginalifDuplicatedz[i] = std::make_unique(Form("IPOriginalifDuplicatedz_L%d", i), Form("IPOriginalifDuplicatedz_L%d", i), 200, -10, 10); + + phiFound[i] = std::make_unique(Form("phiFound_L%d", i), Form("phiFound_L%d", i), 190, -3.2, 3.2); + rowFound[i] = std::make_unique(Form("rowFound_L%d", i), Form("rowFound_L%d", i), 128, -0.5, 511.5); + phiNotFound[i] = std::make_unique(Form("phiNotFound_L%d", i), Form("phiNotFound_L%d", i), 90, -3.2, 3.2); + rowNotFound[i] = std::make_unique(Form("rowNotFound_L%d", i), Form("rowNotFound_L%d", i), 128, -0.5, 511.5); + zFound[i] = std::make_unique(Form("zFound_L%d", i), Form("zFound_L%d", i), 100, -15, 15); + zNotFound[i] = std::make_unique(Form("zNotFound%d", i), Form("zNotFound%d", i), 100, -15, 15); + colFoundOriginalVsDuplicated[i] = std::make_unique(Form("colFoundOriginalVsDuplicated_L%d", i), Form("colFoundOriginalVsDuplicated_L%d; Col Original cluster; Col Overlap cluster", i), 9216, -0.5, 9215.5, 9216, -0.5, 9215.5); + colFoundOriginal[i] = std::make_unique(Form("colFoundOriginal_L%d", i), Form("colFoundOriginal_L%d; Col Original cluster;", i), 9216, -0.5, 9215.5); + colNotFound[i] = std::make_unique(Form("colNotFound_L%d", i), Form("colNotFound_L%d", i), 9216, -0.5, 9215.5); + radiusFound[i] = std::make_unique(Form("radiusFound_L%d", i), Form("radiusFound_L%d", i), 80, 0, 6); + radiusNotFound[i] = std::make_unique(Form("radiusNotFound_L%d", i), Form("radiusNotFound_L%d", i), 80, 0, 4); for (int j = 0; j < 3; j++) { mDuplicatedEta[i][j] = std::make_unique(Form("mDuplicatedEta_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #eta; Number of duplicated clusters L%d", mrangesPt[j][0], mrangesPt[j][1], i), 40, -2, 2); mNGoodMatchesEta[i][j] = std::make_unique(Form("mNGoodMatchesEta_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #eta; Number of good matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 40, -2, 2); mNFakeMatchesEta[i][j] = std::make_unique(Form("mNFakeMatchesEta_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #eta; Number of fake matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 40, -2, 2); - mDuplicatedPhi[i][j] = std::make_unique(Form("mDuplicatedPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of duplicated clusters L%d", mrangesPt[j][0], mrangesPt[j][1], i), 1440, -180, 180); - mNGoodMatchesPhi[i][j] = std::make_unique(Form("mNGoodMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of good matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 1440, -180, 180); - mNFakeMatchesPhi[i][j] = std::make_unique(Form("mNFakeMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of fake matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 1440, -180, 180); + mDuplicatedPhi[i][j] = std::make_unique(Form("mDuplicatedPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of duplicated clusters L%d", mrangesPt[j][0], mrangesPt[j][1], i), 90, -3.2, 3.2); + mNGoodMatchesPhi[i][j] = std::make_unique(Form("mNGoodMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of good matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 90, -3.2, 3.2); + mNFakeMatchesPhi[i][j] = std::make_unique(Form("mNFakeMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of fake matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 90, -3.2, 3.2); } } gStyle->SetPalette(55); @@ -508,7 +562,6 @@ void EfficiencyStudy::initialiseRun(o2::globaltracking::RecoContainer& recoData) mTracksMCLabels = recoData.getITSTracksMCLabels(); mClustersMCLCont = recoData.getITSClustersMCLabels(); } - mITSClustersArray.clear(); mTracksROFRecords = recoData.getITSTracksROFRecords(); mTracks = recoData.getITSTracks(); @@ -574,7 +627,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) continue; } - float ip[2]; + float ip[2]; // IP from 0,0,0 and the track should be the deplacement of the primary vertex track.getImpactParams(0, 0, 0, 0, ip); // if (abs(ip[0])>0.001 ) continue; ///pv not in (0,0,0) @@ -587,14 +640,12 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) auto pt = trackParCov.getPt(); auto eta = trackParCov.getEta(); - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } + // if (pt < mPtCuts[0] || pt > mPtCuts[1]) { + // continue; + // } + // if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { + // continue; + // } float phioriginal = 0; float phiduplicated = 0; @@ -617,8 +668,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phioriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - mPhiTrackoriginalvsphioriginal[layer]->Fill(phiTrack, phioriginal); + phioriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; mPhiOriginal[layer]->Fill(phioriginal); mPtOriginal[layer]->Fill(pt); @@ -645,7 +695,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; // phiduplicated = std::atan2(clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.x()) * 180 / M_PI + 180; - phiduplicated = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phiduplicated = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; auto labsClus = mClustersMCLCont->getLabels(iClus); // ideally I can have more than one label per cluster for (auto labC : labsClus) { @@ -686,8 +736,6 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) mEtaDuplicated[layerClus]->Fill(eta); mPhiDuplicated[layerClus]->Fill(phiduplicated); mZvsPhiDUplicated[layerClus]->Fill(clusDuplicatedPointGlob.Z(), phiduplicated); - mPhiTrackDuplicated[layerClus]->Fill(phiTrack); - mPhiTrackDuplicatedvsphiDuplicated[layerClus]->Fill(phiTrack, phioriginal); mPhiOriginalIfDuplicated[layerClus]->Fill(phioriginal); } @@ -700,6 +748,8 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) } UShort_t rowDuplicated = clusDuplicated.getRow(); mDuplicatedRow[layerDuplicated]->Fill(rowOriginal); + mDuplicatedCol[layerDuplicated]->Fill(clusOriginal.getCol()); + mDuplicatedZ[layerDuplicated]->Fill(clusOriginalPointGlob.Z()); mDuplicatedPt[layerDuplicated]->Fill(pt); mDuplicatedPtEta[layerDuplicated]->Fill(pt, eta); mDuplicatedPtPhi[layerDuplicated]->Fill(pt, phiduplicated); @@ -713,12 +763,6 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) m3DClusterPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.z()); m2DClusterDuplicatedPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y()); - /// compute the distance between original and dubplicated cluster - mDistanceClustersX[layerClus]->Fill(abs(clusOriginalPointGlob.x() - clusDuplicatedPointGlob.x())); - mDistanceClustersY[layerClus]->Fill(abs(clusOriginalPointGlob.y() - clusDuplicatedPointGlob.y())); - mDistanceClustersZ[layerClus]->Fill(abs(clusOriginalPointGlob.z() - clusDuplicatedPointGlob.z())); - mDistanceClusters[layerClus]->Fill(std::hypot(clusOriginalPointGlob.x() - clusDuplicatedPointGlob.x(), clusOriginalPointGlob.y() - clusDuplicatedPointGlob.y(), clusOriginalPointGlob.z() - clusDuplicatedPointGlob.z())); - /// Compute the DCA between the cluster location and the track /// first propagate to the original cluster @@ -815,7 +859,6 @@ void EfficiencyStudy::countDuplicatedAfterCuts() rofNEntriesClus = mClustersROFRecords[iROF].getNEntries(); for (unsigned int iTrack = rofIndexTrack; iTrack < rofIndexTrack + rofNEntriesTrack; iTrack++) { // loop on tracks per ROF - // std::cout<<"Track number: "< mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { + // applying the cuts on the track - only eta + + if (eta < mEtaCuts[0] || eta >= mEtaCuts[1]) { continue; } @@ -860,38 +900,22 @@ void EfficiencyStudy::countDuplicatedAfterCuts() o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; + phiOriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; - /// applying the cuts on the phi of the original cluster - bool keepTrack = false; /// wether or not a cluster is found in an eligible track in the corresponding layer - - if (layerOriginal == 0) { - for (int i = 0; i < 10; i++) { - if ((phiOriginal >= mPhiCutsL0[i][0] && phiOriginal <= mPhiCutsL0[i][1])) { - possibleduplicated[0]++; - keepTrack = true; - } - } + if (abs(clusOriginalPointGlob.y()) < 0.5) { ///// excluding gap between bottom and top barrels + continue; } - if (layerOriginal == 1) { - for (int i = 0; i < 12; i++) { - if ((phiOriginal >= mPhiCutsL1[i][0] && phiOriginal <= mPhiCutsL1[i][1])) { - possibleduplicated[1]++; - keepTrack = true; - } - } + + if (abs(clusOriginalPointGlob.z()) >= 10) { /// excluding external z + continue; } - if (layerOriginal == 2) { - for (int i = 0; i < 17; i++) { - if ((phiOriginal >= mPhiCutsL2[i][0] && phiOriginal <= mPhiCutsL2[i][1])) { - possibleduplicated[2]++; - keepTrack = true; - } - } + + if (clusOriginal.getRow() < 2 || (clusOriginal.getRow() > 15 && clusOriginal.getRow() < 496) || clusOriginal.getRow() > 509) { //// cutting on the row + continue; } - if (!keepTrack) { - continue; /// if the track (cluster) is not eligible for any layer, go to the next one + if (clusOriginal.getCol() < 160 || clusOriginal.getCol() > 870) { /// excluding the gap between two chips in the same stave (comment to obtain the plot efficiency col vs eta) + continue; } for (auto& labT : labsTrack) { // for each valid label iterate over ALL the clusters in the ROF to see if there are duplicates @@ -912,7 +936,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phi = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; auto labsClus = mClustersMCLCont->getLabels(iClus); // ideally I can have more than one label per cluster for (auto labC : labsClus) { @@ -940,7 +964,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() } duplicated[layer]++; - std::cout << "Taken L" << layer << " # " << duplicated[layer] << " : pt, eta, phi = " << pt << " , " << eta << " , " << phiOriginal << " Label: " << std::endl; + std::cout << "Taken L" << layer << " # " << duplicated[layer] << " : eta, phi = " << eta << " , " << phiOriginal << " Label: " << std::endl; labC.print(); } } @@ -1035,15 +1059,6 @@ void EfficiencyStudy::studyDCAcutsMC() float ip[2]; track.getImpactParams(0, 0, 0, 0, ip); - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } - - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - float phi = -999.; float phiOriginal = -999.; int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track @@ -1077,7 +1092,7 @@ void EfficiencyStudy::studyDCAcutsMC() o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; + phiOriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; for (auto& labT : labsOriginal) { // for each valid label iterate over ALL the clusters in the ROF to see if there are duplicates if (labT != tracklab) { @@ -1114,7 +1129,7 @@ void EfficiencyStudy::studyDCAcutsMC() o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phi = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; /// Compute the DCA between the duplicated cluster location and the track trackParCov.rotate(mGeometry->getSensorRefAlpha(clusDuplicated.getSensorID())); @@ -1150,7 +1165,6 @@ void EfficiencyStudy::studyDCAcutsMC() mnGoodMatchesPt_layer[layerDuplicated]->Fill(pt, i); mnGoodMatchesEta_layer[layerDuplicated]->Fill(eta, i); mnGoodMatchesPhi_layer[layerDuplicated]->Fill(phi, i); - mnGoodMatchesPhiTrack_layer[layerDuplicated]->Fill(phiTrack, i); mnGoodMatchesPhiOriginal_layer[layerDuplicated]->Fill(phiOriginal, i); } else { @@ -1159,7 +1173,6 @@ void EfficiencyStudy::studyDCAcutsMC() mnFakeMatchesPt_layer[layerDuplicated]->Fill(pt, i); mnFakeMatchesEta_layer[layerDuplicated]->Fill(eta, i); mnFakeMatchesPhi_layer[layerDuplicated]->Fill(phi, i); - mnFakeMatchesPhiTrack_layer[layerDuplicated]->Fill(phiTrack, i); } } else if (mVerboseOutput) { LOGP(info, "Check DCA failed"); @@ -1211,13 +1224,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyGoodMatchPhiOriginal_layer[l]->SetBinContent(iphi + 1, i + 1, mnGoodMatchesPhiOriginal_layer[l]->GetBinContent(iphi + 1, i + 1) / mPhiOriginalIfDuplicated[l]->GetBinContent(iphi + 1)); } } - - for (int iphi = 0; iphi < mPhiTrackDuplicated[l]->GetNbinsX(); iphi++) { - if (mPhiTrackDuplicated[l]->GetBinContent(iphi + 1) != 0) { - mEfficiencyGoodMatchPhiTrack_layer[l]->SetBinContent(iphi + 1, i + 1, mnGoodMatchesPhiTrack_layer[l]->GetBinContent(iphi + 1, i + 1) / mPhiTrackDuplicated[l]->GetBinContent(iphi + 1)); - } - mEfficiencyFakeMatchPhiTrack_layer[l]->SetBinContent(iphi + 1, i + 1, mnFakeMatchesPhiTrack_layer[l]->GetBinContent(iphi + 1, i + 1) / mPhiTrackDuplicated[l]->GetBinContent(iphi + 1)); - } } } for (int i = 0; i < NLAYERS; i++) { @@ -1243,8 +1249,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyGoodMatchEta_layer[l]->Write(); mEfficiencyGoodMatchPhi_layer[l]->GetZaxis()->SetRangeUser(0, 1); mEfficiencyGoodMatchPhi_layer[l]->Write(); - mEfficiencyGoodMatchPhiTrack_layer[l]->GetZaxis()->SetRangeUser(0, 1); - mEfficiencyGoodMatchPhiTrack_layer[l]->Write(); mEfficiencyGoodMatchPhiOriginal_layer[l]->GetZaxis()->SetRangeUser(0, 1); mEfficiencyGoodMatchPhiOriginal_layer[l]->Write(); mEfficiencyFakeMatchPt_layer[l]->GetZaxis()->SetRangeUser(0, 1); @@ -1253,8 +1257,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyFakeMatchEta_layer[l]->Write(); mEfficiencyFakeMatchPhi_layer[l]->GetZaxis()->SetRangeUser(0, 1); mEfficiencyFakeMatchPhi_layer[l]->Write(); - mEfficiencyFakeMatchPhiTrack_layer[l]->GetZaxis()->SetRangeUser(0, 1); - mEfficiencyFakeMatchPhiTrack_layer[l]->Write(); } mOutFile->mkdir("Efficiency/"); @@ -1289,7 +1291,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyTotal->Draw("same P l E1_NOSTAT PLC PMC"); leg->Draw("same"); c.Write(); - c.SaveAs("prova.png"); TCanvas cc[NLAYERS]; for (int l = 0; l < NLAYERS; l++) { @@ -1312,12 +1313,12 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyTotal_layer[l]->Draw("same P l E1_NOSTAT"); leg->Draw("same"); cc[l].Write(); - cc[l].SaveAs(Form("provaLayer%d.png", l)); } } void EfficiencyStudy::studyClusterSelectionMC() { + //// to be used only with MC // study to find a good selection method for the duplicated cluster, to be used for non-MC data // iterate over tracks an associated clusters, and find the closer cluster that is not the original one applying cuts on staveID and chipID // fix the DCA < 10 sigma, then compute the efficiency for each bin of pt, eta and phi and also in the rows @@ -1392,14 +1393,6 @@ void EfficiencyStudy::studyClusterSelectionMC() auto pt = trackParCov.getPt(); auto eta = trackParCov.getEta(); - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } - - // auto phi = trackParCov.getPhi()*180/M_PI; float phi = -999.; float phiOriginal = -999.; float phiDuplicated = -999.; @@ -1410,7 +1403,6 @@ void EfficiencyStudy::studyClusterSelectionMC() tracklab.print(); } for (int iclTrack = firstClus; iclTrack < firstClus + ncl; iclTrack++) { // loop on clusters associated to the track to extract layer, stave and chip to restrict the possible matches to be searched with the DCA cut - // LOGP(info, "New cluster"); auto& clusOriginal = mClusters[mInputITSidxs[iclTrack]]; auto layerOriginal = mGeometry->getLayer(clusOriginal.getSensorID()); if (layerOriginal >= NLAYERS) { @@ -1426,7 +1418,7 @@ void EfficiencyStudy::studyClusterSelectionMC() o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - auto phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; + auto phiOriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; auto labsOriginal = mClustersMCLCont->getLabels(mInputITSidxs[iclTrack]); // get labels of the cluster associated to the track (original) auto staveOriginal = mGeometry->getStave(clusOriginal.getSensorID()); @@ -1464,7 +1456,7 @@ void EfficiencyStudy::studyClusterSelectionMC() o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - auto phiDuplicated = clusDuplicatedPointGlob.phi() * 180 / M_PI; + auto phiDuplicated = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; /// Compute the DCA between the duplicated cluster location and the track trackParCov.rotate(mGeometry->getSensorRefAlpha(clusDuplicated.getSensorID())); @@ -1473,7 +1465,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } // Imposing that the distance between the original cluster and the duplicated one is less than x sigma - if (!(abs(meanDCAxyDuplicated[layerDuplicated] - clusDuplicatedDCA[0]) < 8 * sigmaDCAxyDuplicated[layerDuplicated] && abs(meanDCAzDuplicated[layerDuplicated] - clusDuplicatedDCA[1]) < 8 * sigmaDCAzDuplicated[layerDuplicated])) { + if (!(clusDuplicatedDCA[0] > mDCACutsXY[layerDuplicated][0] && clusDuplicatedDCA[0] < mDCACutsXY[layerDuplicated][1] && clusDuplicatedDCA[1] > mDCACutsZ[layerDuplicated][0] && clusDuplicatedDCA[1] < mDCACutsZ[layerDuplicated][1])) { continue; } @@ -1503,12 +1495,11 @@ void EfficiencyStudy::studyClusterSelectionMC() for (auto lab : std::get<2>(clusID_rDCA_label)) { if (lab == tracklab) { isGood = true; - diffPhivsPt[layerOriginal]->Fill(pt, abs(phi - phiOriginal)); - IPOriginalifDuplicatedxy[layerOriginal]->Fill(ip[0]); - IPOriginalifDuplicatedz[layerOriginal]->Fill(ip[1]); mNGoodMatchesPt[layerOriginal]->Fill(pt); mNGoodMatchesRow[layerOriginal]->Fill(row); + mNGoodMatchesCol[layerOriginal]->Fill(clusOriginal.getCol()); + mNGoodMatchesZ[layerOriginal]->Fill(clusOriginalPointGlob.Z()); mNGoodMatchesPtEta[layerOriginal]->Fill(pt, eta); mNGoodMatchesPtPhi[layerOriginal]->Fill(pt, phi); mNGoodMatchesEtaPhi[layerOriginal]->Fill(eta, phi); @@ -1529,6 +1520,8 @@ void EfficiencyStudy::studyClusterSelectionMC() mNFakeMatchesPt[layerOriginal]->Fill(pt); mNFakeMatchesRow[layerOriginal]->Fill(row); + mNFakeMatchesCol[layerOriginal]->Fill(clusOriginal.getCol()); + mNFakeMatchesZ[layerOriginal]->Fill(clusOriginalPointGlob.Z()); mNFakeMatchesPtEta[layerOriginal]->Fill(pt, eta); mNFakeMatchesPtPhi[layerOriginal]->Fill(pt, phi); mNFakeMatchesEtaPhi[layerOriginal]->Fill(eta, phi); @@ -1549,33 +1542,37 @@ void EfficiencyStudy::studyClusterSelectionMC() mOutFile->mkdir("EfficiencyCuts/"); mOutFile->cd("EfficiencyCuts/"); - std::cout << "------Calculatin efficiency..." << std::endl; - TH1D* axpt = new TH1D("axpt", "", 1, 0.05, 7.5); - TH1D* axRow = new TH1D("axRow", "", 1, -0.5, 511.5); - TH2D* axptetaGood = new TH2D("axptetaGood", "", 1, 0.05, 7.5, 1, -2, 2); - TH2D* axptetaFake = new TH2D("axptetaFake", "", 1, 0.05, 7.5, 1, -2, 2); - TH2D* axptphiGood = new TH2D("axptphiGood", "", 1, 0.05, 7.5, 1, -180, 180); - TH2D* axptphiFake = new TH2D("axptphiFake", "", 1, 0.05, 7.5, 1, -180, 180); - TH2D* axetaphiGood = new TH2D("axetaphiGood", "", 1, -2, 2, 1, -180, 180); - TH2D* axetaphiFake = new TH2D("axetaphiFake", "", 1, -2, 2, 1, -180, 180); - TH1D* axetaAllPt = new TH1D("axetaAllPt", "", 1, -2, 2); - TH1D* axeta[NLAYERS]; - TH1D* axphi[NLAYERS]; + std::cout << "Calculating efficiency..." << std::endl; + std::unique_ptr axpt = std::make_unique("axpt", "", 1, 0.05, 7.5); + std::unique_ptr axRow = std::make_unique("axRow", "", 1, -0.5, 511.5); + std::unique_ptr axCol = std::make_unique("axRow", "", 1, -0.5, 1023.5); + std::unique_ptr axZ = std::make_unique("axZ", "", 1, -15, 15); + std::unique_ptr axptetaGood = std::make_unique("axptetaGood", "", 1, 0.05, 7.5, 1, -2, 2); + std::unique_ptr axptetaFake = std::make_unique("axptetaFake", "", 1, 0.05, 7.5, 1, -2, 2); + std::unique_ptr axptphiGood = std::make_unique("axptphiGood", "", 1, 0.05, 7.5, 1, -3.2, 3.2); + std::unique_ptr axptphiFake = std::make_unique("axptphiFake", "", 1, 0.05, 7.5, 1, -3.2, 3.2); + std::unique_ptr axetaphiGood = std::make_unique("axetaphiGood", "", 1, -2, 2, 1, -3.2, 3.2); + std::unique_ptr axetaphiFake = std::make_unique("axetaphiFake", "", 1, -2, 2, 1, -3.2, 3.2); + std::unique_ptr axetaAllPt = std::make_unique("axetaAllPt", "", 1, -2, 2); + std::unique_ptr axeta[NLAYERS]; + std::unique_ptr axphi[NLAYERS]; for (int ipt = 0; ipt < 3; ipt++) { - axeta[ipt] = new TH1D(Form("axeta%d", ipt), Form("axeta%d", ipt), 1, -2, 2); - axphi[ipt] = new TH1D(Form("axphi%d", ipt), Form("axphi%d", ipt), 1, -180, 180); + axeta[ipt] = std::make_unique(Form("axeta%d", ipt), Form("axeta%d", ipt), 1, -2, 2); + axphi[ipt] = std::make_unique(Form("axphi%d", ipt), Form("axphi%d", ipt), 1, -3.2, 3.2); } - TH1D* axphiAllPt = new TH1D("axphi", "", 1, -180, 180); - - TCanvas* effPt[NLAYERS]; - TCanvas* effRow[NLAYERS]; - TCanvas* effPtEta[NLAYERS][2]; - TCanvas* effPtPhi[NLAYERS][2]; - TCanvas* effEtaPhi[NLAYERS][2]; - TCanvas* effEtaAllPt[NLAYERS]; - TCanvas* effEta[NLAYERS][3]; - TCanvas* effPhiAllPt[NLAYERS]; - TCanvas* effPhi[NLAYERS][3]; + std::unique_ptr axphiAllPt = std::make_unique("axphi", "", 1, -3.2, 3.2); + + std::unique_ptr effPt[NLAYERS]; + std::unique_ptr effRow[NLAYERS]; + std::unique_ptr effCol[NLAYERS]; + std::unique_ptr effZ[NLAYERS]; + std::unique_ptr effPtEta[NLAYERS][2]; + std::unique_ptr effPtPhi[NLAYERS][2]; + std::unique_ptr effEtaPhi[NLAYERS][2]; + std::unique_ptr effEtaAllPt[NLAYERS]; + std::unique_ptr effEta[NLAYERS][3]; + std::unique_ptr effPhiAllPt[NLAYERS]; + std::unique_ptr effPhi[NLAYERS][3]; ///////////////// plotting results for (int l = 0; l < 3; l++) { @@ -1583,12 +1580,8 @@ void EfficiencyStudy::studyClusterSelectionMC() std::cout << "Pt L" << l << "\n\n"; } - diffPhivsPt[l]->Write(); - IPOriginalifDuplicatedxy[l]->Write(); - IPOriginalifDuplicatedz[l]->Write(); - // Pt - effPt[l] = new TCanvas(Form("effPt_L%d", l)); + effPt[l] = std::make_unique(Form("effPt_L%d", l)); mEffPtGood[l] = std::make_unique(*mNGoodMatchesPt[l], *mDuplicatedPt[l]); stileEfficiencyGraph(mEffPtGood[l], Form("mEffPtGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); @@ -1616,7 +1609,7 @@ void EfficiencyStudy::studyClusterSelectionMC() effPt[l]->Write(); // PtEtaGood - effPtEta[l][0] = new TCanvas(Form("effPtEtaGood_L%d", l)); + effPtEta[l][0] = std::make_unique(Form("effPtEtaGood_L%d", l)); mEffPtEtaGood[l] = std::make_unique(*mNGoodMatchesPtEta[l], *mDuplicatedPtEta[l]); stileEfficiencyGraph(mEffPtEtaGood[l], Form("mEffPtEtaGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#eta;Efficiency", l), true); @@ -1647,7 +1640,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } // Row - effRow[l] = new TCanvas(Form("effRow_L%d", l)); + effRow[l] = std::make_unique(Form("effRow_L%d", l)); for (int ibin = 1; ibin <= mNGoodMatchesRow[l]->GetNbinsX(); ibin++) { std::cout << "--- Good Row: Npass = " << mNGoodMatchesRow[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedRow[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; @@ -1667,7 +1660,7 @@ void EfficiencyStudy::studyClusterSelectionMC() axRow->SetTitle(Form("L%d;Row;Efficiency", l)); axRow->GetYaxis()->SetRangeUser(-0.1, 1.1); - axRow->GetXaxis()->SetRangeUser(0.05, 7.5); + axRow->GetXaxis()->SetRangeUser(0, 512); axRow->Draw(); mEffRowGood[l]->Draw("same p"); mEffRowFake[l]->Draw("same p"); @@ -1678,8 +1671,72 @@ void EfficiencyStudy::studyClusterSelectionMC() legRow->Draw("same"); effRow[l]->Write(); + // Col + effCol[l] = std::make_unique(Form("effCol_L%d", l)); + + for (int ibin = 1; ibin <= mNGoodMatchesCol[l]->GetNbinsX(); ibin++) { + std::cout << "--- Good Col: Npass = " << mNGoodMatchesCol[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedCol[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + } + + mEffColGood[l] = std::make_unique(*mNGoodMatchesCol[l], *mDuplicatedCol[l]); + stileEfficiencyGraph(mEffColGood[l], Form("mEffColGood_L%d", l), Form("L%d;Col;Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + + for (int ibin = 1; ibin <= mNFakeMatchesCol[l]->GetNbinsX(); ibin++) { + if (mNFakeMatchesCol[l]->GetBinContent(ibin) > mDuplicatedCol[l]->GetBinContent(ibin)) { + std::cout << "--- Col: Npass = " << mNFakeMatchesCol[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedCol[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + mNFakeMatchesCol[l]->SetBinContent(ibin, mDuplicatedCol[l]->GetBinContent(ibin)); + } + } + mEffColFake[l] = std::make_unique(*mNFakeMatchesCol[l], *mDuplicatedCol[l]); + stileEfficiencyGraph(mEffColFake[l], Form("mEffColFake_L%d", l), Form("L%d;Col;Efficiency", l), false, kFullDiamond, 1, kRed + 1, kRed + 1); + + axCol->SetTitle(Form("L%d;Col;Efficiency", l)); + axCol->GetYaxis()->SetRangeUser(-0.1, 1.1); + axCol->GetXaxis()->SetRangeUser(0, 1024); + axCol->Draw(); + mEffColGood[l]->Draw("same p"); + mEffColFake[l]->Draw("same p"); + + auto legCol = std::make_unique(0.70, 0.15, 0.89, 0.35); + legCol->AddEntry(mEffColGood[l].get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legCol->AddEntry(mEffColFake[l].get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legCol->Draw("same"); + effCol[l]->Write(); + + // Z + effZ[l] = std::make_unique(Form("effZ_L%d", l)); + + for (int ibin = 1; ibin <= mNGoodMatchesZ[l]->GetNbinsX(); ibin++) { + std::cout << "--- Good Z: Npass = " << mNGoodMatchesZ[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedZ[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + } + + mEffZGood[l] = std::make_unique(*mNGoodMatchesZ[l], *mDuplicatedZ[l]); + stileEfficiencyGraph(mEffZGood[l], Form("mEffZGood_L%d", l), Form("L%d;Z;Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + + for (int ibin = 1; ibin <= mNFakeMatchesZ[l]->GetNbinsX(); ibin++) { + if (mNFakeMatchesZ[l]->GetBinContent(ibin) > mDuplicatedZ[l]->GetBinContent(ibin)) { + std::cout << "--- Z: Npass = " << mNFakeMatchesZ[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedZ[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + mNFakeMatchesZ[l]->SetBinContent(ibin, mDuplicatedZ[l]->GetBinContent(ibin)); + } + } + mEffZFake[l] = std::make_unique(*mNFakeMatchesZ[l], *mDuplicatedZ[l]); + stileEfficiencyGraph(mEffZFake[l], Form("mEffZFake_L%d", l), Form("L%d;Z;Efficiency", l), false, kFullDiamond, 1, kRed + 1, kRed + 1); + + axZ->SetTitle(Form("L%d;Z;Efficiency", l)); + axZ->GetYaxis()->SetRangeUser(-0.1, 1.1); + axZ->GetXaxis()->SetRangeUser(0, 512); + axZ->Draw(); + mEffZGood[l]->Draw("same p"); + mEffZFake[l]->Draw("same p"); + + auto legZ = std::make_unique(0.70, 0.15, 0.89, 0.35); + legZ->AddEntry(mEffZGood[l].get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legZ->AddEntry(mEffZFake[l].get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legZ->Draw("same"); + effZ[l]->Write(); + // PtEtaGood - effPtEta[l][0] = new TCanvas(Form("effPtEtaGood_L%d", l)); + effPtEta[l][0] = std::make_unique(Form("effPtEtaGood_L%d", l)); mEffPtEtaGood[l] = std::make_unique(*mNGoodMatchesPtEta[l], *mDuplicatedPtEta[l]); stileEfficiencyGraph(mEffPtEtaGood[l], Form("mEffPtEtaGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#eta;Efficiency", l), true); @@ -1710,7 +1767,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } // PtEtaFake - effPtEta[l][1] = new TCanvas(Form("effPtEtaFake_L%d", l)); + effPtEta[l][1] = std::make_unique(Form("effPtEtaFake_L%d", l)); mEffPtEtaFake[l] = std::make_unique(*mNFakeMatchesPtEta[l], *mDuplicatedPtEta[l]); stileEfficiencyGraph(mEffPtEtaFake[l], Form("mEffPtEtaFake_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#eta;Efficiency", l), true); @@ -1724,14 +1781,14 @@ void EfficiencyStudy::studyClusterSelectionMC() effPtEta[l][1]->Write(); // PtPhiGood - effPtPhi[l][0] = new TCanvas(Form("effPtPhiGood_L%d", l)); + effPtPhi[l][0] = std::make_unique(Form("effPtPhiGood_L%d", l)); mEffPtPhiGood[l] = std::make_unique(*mNGoodMatchesPtPhi[l], *mDuplicatedPtPhi[l]); - stileEfficiencyGraph(mEffPtPhiGood[l], Form("mEffPtPhiGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l), true); + stileEfficiencyGraph(mEffPtPhiGood[l], Form("mEffPtPhiGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l), true); - axptphiGood->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l)); + axptphiGood->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l)); axptphiGood->GetZaxis()->SetRangeUser(-0.1, 1.1); - axptphiGood->GetYaxis()->SetRangeUser(-180, 180); + axptphiGood->GetYaxis()->SetRangeUser(-3.2, 3.2); axptphiGood->GetXaxis()->SetRangeUser(0.05, 7.5); axptphiGood->Draw(); mEffPtPhiGood[l]->Draw("same colz"); @@ -1750,13 +1807,13 @@ void EfficiencyStudy::studyClusterSelectionMC() } // PtPhiFake - effPtPhi[l][1] = new TCanvas(Form("effPtPhiFake_L%d", l)); + effPtPhi[l][1] = std::make_unique(Form("effPtPhiFake_L%d", l)); mEffPtPhiFake[l] = std::make_unique(*mNFakeMatchesPtPhi[l], *mDuplicatedPtPhi[l]); - stileEfficiencyGraph(mEffPtPhiFake[l], Form("mEffPtPhiFake_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l), true); - axptphiFake->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l)); + stileEfficiencyGraph(mEffPtPhiFake[l], Form("mEffPtPhiFake_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l), true); + axptphiFake->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l)); axptphiFake->GetZaxis()->SetRangeUser(-0.1, 1.1); - axptphiFake->GetYaxis()->SetRangeUser(-180, 180); + axptphiFake->GetYaxis()->SetRangeUser(-3.2, 3.2); axptphiFake->GetXaxis()->SetRangeUser(0.05, 7.5); axptphiFake->Draw(); mEffPtPhiFake[l]->Draw("same colz"); @@ -1764,14 +1821,14 @@ void EfficiencyStudy::studyClusterSelectionMC() effPtPhi[l][1]->Write(); // EtaPhiGood - effEtaPhi[l][0] = new TCanvas(Form("effEtaPhiGood_L%d", l)); + effEtaPhi[l][0] = std::make_unique(Form("effEtaPhiGood_L%d", l)); mEffEtaPhiGood[l] = std::make_unique(*mNGoodMatchesEtaPhi[l], *mDuplicatedEtaPhi[l]); - stileEfficiencyGraph(mEffEtaPhiGood[l], Form("mEffEtaPhiGood_L%d", l), Form("L%d;#eta;#phi (deg);Efficiency", l), true); + stileEfficiencyGraph(mEffEtaPhiGood[l], Form("mEffEtaPhiGood_L%d", l), Form("L%d;#eta;#phi (rad);Efficiency", l), true); - axetaphiGood->SetTitle(Form("L%d;#eta;#phi (deg);Efficiency", l)); + axetaphiGood->SetTitle(Form("L%d;#eta;#phi (rad);Efficiency", l)); axetaphiGood->GetZaxis()->SetRangeUser(-0.1, 1.1); - axetaphiGood->GetYaxis()->SetRangeUser(-180, 180); + axetaphiGood->GetYaxis()->SetRangeUser(-3.2, 3.2); axetaphiGood->GetXaxis()->SetRangeUser(-2, 2); axetaphiGood->Draw(); mEffEtaPhiGood[l]->Draw("same colz"); @@ -1790,13 +1847,13 @@ void EfficiencyStudy::studyClusterSelectionMC() } // EtaPhiFake - effEtaPhi[l][1] = new TCanvas(Form("effEtaPhiFake_L%d", l)); + effEtaPhi[l][1] = std::make_unique(Form("effEtaPhiFake_L%d", l)); mEffEtaPhiFake[l] = std::make_unique(*mNFakeMatchesEtaPhi[l], *mDuplicatedEtaPhi[l]); - stileEfficiencyGraph(mEffEtaPhiFake[l], Form("mEffEtaPhiFake_L%d", l), Form("L%d;#eta;#phi (deg);Efficiency", l), true); - axetaphiFake->SetTitle(Form("L%d;#eta;#phi (deg);Efficiency", l)); + stileEfficiencyGraph(mEffEtaPhiFake[l], Form("mEffEtaPhiFake_L%d", l), Form("L%d;#eta;#phi (rad);Efficiency", l), true); + axetaphiFake->SetTitle(Form("L%d;#eta;#phi (rad);Efficiency", l)); axetaphiFake->GetZaxis()->SetRangeUser(-0.1, 1.1); - axetaphiFake->GetYaxis()->SetRangeUser(-180, 180); + axetaphiFake->GetYaxis()->SetRangeUser(-3.2, 3.2); axetaphiFake->GetXaxis()->SetRangeUser(-2, 2); axetaphiFake->Draw(); mEffEtaPhiFake[l]->Draw("same colz"); @@ -1808,7 +1865,7 @@ void EfficiencyStudy::studyClusterSelectionMC() std::cout << "Eta L" << l << "\n\n"; } - effEtaAllPt[l] = new TCanvas(Form("effEtaAllPt_L%d", l)); + effEtaAllPt[l] = std::make_unique(Form("effEtaAllPt_L%d", l)); mEffEtaGoodAllPt[l] = std::make_unique(*mNGoodMatchesEtaAllPt[l], *mDuplicatedEtaAllPt[l]); stileEfficiencyGraph(mEffEtaGoodAllPt[l], Form("mEffEtaGoodAllPt_L%d", l), Form("L%d;#eta;Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); @@ -1840,7 +1897,7 @@ void EfficiencyStudy::studyClusterSelectionMC() /// eta and phi in different pt ranges for (int ipt = 0; ipt < 3; ipt++) { // eta - effEta[l][ipt] = new TCanvas(Form("effEta_L%d_pt%d", l, ipt)); + effEta[l][ipt] = std::make_unique(Form("effEta_L%d_pt%d", l, ipt)); mEffEtaGood[l][ipt] = std::make_unique(*mNGoodMatchesEta[l][ipt], *mDuplicatedEta[l][ipt]); stileEfficiencyGraph(mEffEtaGood[l][ipt], Form("mEffEtaGood_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#eta;Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); @@ -1871,7 +1928,7 @@ void EfficiencyStudy::studyClusterSelectionMC() effEta[l][ipt]->Write(); // phi - effPhi[l][ipt] = new TCanvas(Form("effPhi_L%d_pt%d", l, ipt)); + effPhi[l][ipt] = std::make_unique(Form("effPhi_L%d_pt%d", l, ipt)); for (int ibin = 1; ibin <= mNGoodMatchesPhi[l][ipt]->GetNbinsX(); ibin++) { if (mNGoodMatchesPhi[l][ipt]->GetBinContent(ibin) > mDuplicatedPhi[l][ipt]->GetBinContent(ibin)) { @@ -1883,7 +1940,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } mEffPhiGood[l][ipt] = std::make_unique(*mNGoodMatchesPhi[l][ipt], *mDuplicatedPhi[l][ipt]); - stileEfficiencyGraph(mEffPhiGood[l][ipt], Form("mEffPhiGood_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (deg);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + stileEfficiencyGraph(mEffPhiGood[l][ipt], Form("mEffPhiGood_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (rad);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); for (int ibin = 1; ibin <= mNFakeMatchesPhi[l][ipt]->GetNbinsX(); ibin++) { if (mNFakeMatchesPhi[l][ipt]->GetBinContent(ibin) > mDuplicatedPhi[l][ipt]->GetBinContent(ibin)) { @@ -1895,9 +1952,9 @@ void EfficiencyStudy::studyClusterSelectionMC() } mEffPhiFake[l][ipt] = std::make_unique(*mNFakeMatchesPhi[l][ipt], *mDuplicatedPhi[l][ipt]); - stileEfficiencyGraph(mEffPhiFake[l][ipt], Form("mEffPhiFake_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (deg);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kRed + 1, kRed + 1); + stileEfficiencyGraph(mEffPhiFake[l][ipt], Form("mEffPhiFake_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (rad);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kRed + 1, kRed + 1); - axphi[ipt]->SetTitle(Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (deg);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1])); + axphi[ipt]->SetTitle(Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (rad);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1])); axphi[ipt]->GetYaxis()->SetRangeUser(-0.1, 1.1); axphi[ipt]->Draw(); @@ -1916,7 +1973,7 @@ void EfficiencyStudy::studyClusterSelectionMC() std::cout << "Phi L" << l << "\n\n"; } - effPhiAllPt[l] = new TCanvas(Form("effPhiAllPt_L%d", l)); + effPhiAllPt[l] = std::make_unique(Form("effPhiAllPt_L%d", l)); for (int ibin = 1; ibin <= mNGoodMatchesPhiAllPt[l]->GetNbinsX(); ibin++) { if (mNGoodMatchesPhiAllPt[l]->GetBinContent(ibin) > mDuplicatedPhiAllPt[l]->GetBinContent(ibin)) { @@ -1953,85 +2010,170 @@ void EfficiencyStudy::studyClusterSelectionMC() legPhi->Draw("same"); effPhiAllPt[l]->Write(); } -} - -void EfficiencyStudy::saveDataInfo() -{ - // save histograms for data (phi, eta, pt,...) - LOGP(info, "--------------- saveDataInfo"); - - unsigned int rofIndexTrack = 0; - unsigned int rofNEntriesTrack = 0; - unsigned int rofIndexClus = 0; - unsigned int rofNEntriesClus = 0; - unsigned int totClus = 0; - - for (unsigned int iROF = 0; iROF < mTracksROFRecords.size(); iROF++) { // loop on ROFRecords array - rofIndexTrack = mTracksROFRecords[iROF].getFirstEntry(); - rofNEntriesTrack = mTracksROFRecords[iROF].getNEntries(); - - rofIndexClus = mClustersROFRecords[iROF].getFirstEntry(); - rofNEntriesClus = mClustersROFRecords[iROF].getNEntries(); - - for (unsigned int iTrack = rofIndexTrack; iTrack < rofIndexTrack + rofNEntriesTrack; iTrack++) { // loop on tracks per ROF - auto track = mTracks[iTrack]; - o2::track::TrackParCov trackParCov = mTracks[iTrack]; - int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track - int ncl = track.getNumberOfClusters(); // get the number of clusters of the track - - if (ncl < 7) { - continue; - } - float ip[2]; - track.getImpactParams(0, 0, 0, 0, ip); - - auto pt = trackParCov.getPt(); - auto eta = trackParCov.getEta(); - - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - - // if (pt < mPtCuts[0] || pt > mPtCuts[1]) continue; - // if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) continue; - float phioriginal = 0; - float phiduplicated = 0; - - for (int iclTrack = firstClus; iclTrack < firstClus + ncl; iclTrack++) { // loop on clusters associated to the track - auto& clusOriginal = mClusters[mInputITSidxs[iclTrack]]; - auto clusOriginalPoint = mITSClustersArray[mInputITSidxs[iclTrack]]; // cluster spacepoint in the tracking system - auto staveOriginal = mGeometry->getStave(clusOriginal.getSensorID()); - auto chipOriginal = mGeometry->getChipIdInStave(clusOriginal.getSensorID()); - - auto layer = mGeometry->getLayer(clusOriginal.getSensorID()); - if (layer >= NLAYERS) { - continue; // checking only selected layers - } - - o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; - o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - - phioriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - - mPhiOriginal[layer]->Fill(phioriginal); - mPhiTrackOriginal[layer]->Fill(phiTrack); - mPtOriginal[layer]->Fill(pt); - mEtaOriginal[layer]->Fill(eta); - m3DClusterPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y(), clusOriginalPointGlob.z()); - m2DClusterOriginalPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); - } // end loop on clusters - totClus += ncl; - } // end loop on tracks per ROF - } // end loop on ROFRecords array - LOGP(info, "Total number of clusters: {} ", totClus); + /// all Row + std::unique_ptr effRowAll = std::make_unique("effRowAll"); + auto numRowGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesRow[0]->Clone("numRowGoodAll")); + numRowGoodAll->Add(mNGoodMatchesRow[1].get()); + numRowGoodAll->Add(mNGoodMatchesRow[2].get()); + numRowGoodAll->Write(); + auto numRowFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesRow[0]->Clone("numRowFakeAll")); + numRowFakeAll->Add(mNFakeMatchesRow[1].get()); + numRowFakeAll->Add(mNFakeMatchesRow[2].get()); + numRowFakeAll->Write(); + auto denRowAll = std::unique_ptr((TH1D*)mDuplicatedRow[0]->Clone("denRowAll")); + denRowAll->Add(mDuplicatedRow[1].get()); + denRowAll->Add(mDuplicatedRow[2].get()); + denRowAll->Write(); + + std::unique_ptr mEffRowGoodAll = std::make_unique(*numRowGoodAll, *denRowAll); + stileEfficiencyGraph(mEffRowGoodAll, "mEffRowGoodAll", "L0 + L1 + L2;Row;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffRowFakeAll = std::make_unique(*numRowFakeAll, *denRowAll); + stileEfficiencyGraph(mEffRowFakeAll, "mEffRowFakeAll", "L0 + L1 + L2;Row;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axRow->SetTitle("L0 + L1 + L2;Row;Efficiency"); + axRow->GetYaxis()->SetRangeUser(-0.1, 1.1); + axRow->Draw(); + mEffRowGoodAll->Draw("same p"); + mEffRowFakeAll->Draw("same p"); + + auto legRow = std::make_unique(0.70, 0.15, 0.89, 0.35); + legRow->AddEntry(mEffRowGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legRow->AddEntry(mEffRowFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legRow->Draw("same"); + effRowAll->Write(); + + /// all Col + std::unique_ptr effColAll = std::make_unique("effColAll"); + auto numColGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesCol[0]->Clone("numColGoodAll")); + numColGoodAll->Add(mNGoodMatchesCol[1].get()); + numColGoodAll->Add(mNGoodMatchesCol[2].get()); + numColGoodAll->Write(); + auto numColFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesCol[0]->Clone("numColFakeAll")); + numColFakeAll->Add(mNFakeMatchesCol[1].get()); + numColFakeAll->Add(mNFakeMatchesCol[2].get()); + numColFakeAll->Write(); + auto denColAll = std::unique_ptr((TH1D*)mDuplicatedCol[0]->Clone("denColAll")); + denColAll->Add(mDuplicatedCol[1].get()); + denColAll->Add(mDuplicatedCol[2].get()); + denColAll->Write(); + + std::unique_ptr mEffColGoodAll = std::make_unique(*numColGoodAll, *denColAll); + stileEfficiencyGraph(mEffColGoodAll, "mEffColGoodAll", "L0 + L1 + L2;Column;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffColFakeAll = std::make_unique(*numColFakeAll, *denColAll); + stileEfficiencyGraph(mEffColFakeAll, "mEffColFakeAll", "L0 + L1 + L2;Column;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axCol->SetTitle("L0 + L1 + L2;Col;Efficiency"); + axCol->GetYaxis()->SetRangeUser(-0.1, 1.1); + axCol->Draw(); + mEffColGoodAll->Draw("same p"); + mEffColFakeAll->Draw("same p"); + + auto legCol = std::make_unique(0.70, 0.15, 0.89, 0.35); + legCol->AddEntry(mEffColGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legCol->AddEntry(mEffColFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legCol->Draw("same"); + effColAll->Write(); + + /// all Z + std::unique_ptr effZAll = std::make_unique("effZAll"); + auto numZGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesZ[0]->Clone("numZGoodAll")); + numZGoodAll->Add(mNGoodMatchesZ[1].get()); + numZGoodAll->Add(mNGoodMatchesZ[2].get()); + numZGoodAll->Write(); + auto numZFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesZ[0]->Clone("numZFakeAll")); + numZFakeAll->Add(mNFakeMatchesZ[1].get()); + numZFakeAll->Add(mNFakeMatchesZ[2].get()); + numZFakeAll->Write(); + auto denZAll = std::unique_ptr((TH1D*)mDuplicatedZ[0]->Clone("denZAll")); + denZAll->Add(mDuplicatedZ[1].get()); + denZAll->Add(mDuplicatedZ[2].get()); + denZAll->Write(); + + std::unique_ptr mEffZGoodAll = std::make_unique(*numZGoodAll, *denZAll); + stileEfficiencyGraph(mEffZGoodAll, "mEffZGoodAll", "L0 + L1 + L2;Z;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffZFakeAll = std::make_unique(*numZFakeAll, *denZAll); + stileEfficiencyGraph(mEffZFakeAll, "mEffZFakeAll", "L0 + L1 + L2;Z;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axZ->SetTitle("L0 + L1 + L2;Z;Efficiency"); + axZ->GetYaxis()->SetRangeUser(-0.1, 1.1); + axZ->Draw(); + mEffZGoodAll->Draw("same p"); + mEffZFakeAll->Draw("same p"); + + auto legZ = std::make_unique(0.70, 0.15, 0.89, 0.35); + legZ->AddEntry(mEffZGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legZ->AddEntry(mEffZFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legZ->Draw("same"); + effZAll->Write(); + + /// all Eta + std::unique_ptr effEtaAll = std::make_unique("effEtaAll"); + auto numEtaGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesEtaAllPt[0]->Clone("numEtaGoodAll")); + numEtaGoodAll->Add(mNGoodMatchesEtaAllPt[1].get()); + numEtaGoodAll->Add(mNGoodMatchesEtaAllPt[2].get()); + numEtaGoodAll->Write(); + auto numEtaFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesEtaAllPt[0]->Clone("numEtaFakeAll")); + numEtaFakeAll->Add(mNFakeMatchesEtaAllPt[1].get()); + numEtaFakeAll->Add(mNFakeMatchesEtaAllPt[2].get()); + numEtaFakeAll->Write(); + auto denEtaAll = std::unique_ptr((TH1D*)mDuplicatedEtaAllPt[0]->Clone("denEtaAll")); + denEtaAll->Add(mDuplicatedEtaAllPt[1].get()); + denEtaAll->Add(mDuplicatedEtaAllPt[2].get()); + denEtaAll->Write(); + + std::unique_ptr mEffEtaGoodAll = std::make_unique(*numEtaGoodAll, *denEtaAll); + stileEfficiencyGraph(mEffEtaGoodAll, "mEffEtaGoodAll", "L0 + L1 + L2;#Eta;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffEtaFakeAll = std::make_unique(*numEtaFakeAll, *denEtaAll); + stileEfficiencyGraph(mEffEtaFakeAll, "mEffEtaFakeAll", "L0 + L1 + L2;#Eta;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axetaAllPt->SetTitle("L0 + L1 + L2;Eta;Efficiency"); + axetaAllPt->GetYaxis()->SetRangeUser(-0.1, 1.1); + axetaAllPt->Draw(); + mEffEtaGoodAll->Draw("same p"); + mEffEtaFakeAll->Draw("same p"); + + auto legEta = std::make_unique(0.70, 0.15, 0.89, 0.35); + legEta->AddEntry(mEffEtaGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legEta->AddEntry(mEffEtaFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legEta->Draw("same"); + effEtaAll->Write(); + + /// all Phi + std::unique_ptr effPhiAll = std::make_unique("effPhiAll"); + auto numPhiGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesPhiAllPt[0]->Clone("numPhiGoodAll")); + numPhiGoodAll->Add(mNGoodMatchesPhiAllPt[1].get()); + numPhiGoodAll->Add(mNGoodMatchesPhiAllPt[2].get()); + numPhiGoodAll->Write(); + auto numPhiFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesPhiAllPt[0]->Clone("numPhiFakeAll")); + numPhiFakeAll->Add(mNFakeMatchesPhiAllPt[1].get()); + numPhiFakeAll->Add(mNFakeMatchesPhiAllPt[2].get()); + numPhiFakeAll->Write(); + auto denPhiAll = std::unique_ptr((TH1D*)mDuplicatedPhiAllPt[0]->Clone("denPhiAll")); + denPhiAll->Add(mDuplicatedPhiAllPt[1].get()); + denPhiAll->Add(mDuplicatedPhiAllPt[2].get()); + denPhiAll->Write(); + + std::unique_ptr mEffPhiGoodAll = std::make_unique(*numPhiGoodAll, *denPhiAll); + stileEfficiencyGraph(mEffPhiGoodAll, "mEffPhiGoodAll", "L0 + L1 + L2;#Phi (rad);Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffPhiFakeAll = std::make_unique(*numPhiFakeAll, *denPhiAll); + stileEfficiencyGraph(mEffPhiFakeAll, "mEffPhiFakeAll", "L0 + L1 + L2;#Phi (rad);Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axphiAllPt->SetTitle("L0 + L1 + L2;Phi;Efficiency"); + axphiAllPt->GetYaxis()->SetRangeUser(-0.1, 1.1); + axphiAllPt->Draw(); + mEffPhiGoodAll->Draw("same p"); + mEffPhiFakeAll->Draw("same p"); + + auto legPhi = std::make_unique(0.70, 0.15, 0.89, 0.35); + legPhi->AddEntry(mEffPhiGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legPhi->AddEntry(mEffPhiFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legPhi->Draw("same"); + effPhiAll->Write(); } void EfficiencyStudy::getEfficiency(bool isMC) { // Extract the efficiency for the IB, exploiting the staves overlaps and the duplicated clusters for the tracks passing through the overlaps - // The denominator for the efficiency calculation will be the number of tracks per layer fulfilling some cuts (DCA, phi, eta, pt) + // The denominator for the efficiency calculation will be the number of tracks per layer fulfilling some cuts (eta, z, row, col) // The numerator will be the number of duplicated clusters for the tracks passing through the overlaps - LOGP(info, "--------------- getEfficiency"); + LOGP(info, "getEfficiency()"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; @@ -2041,8 +2183,6 @@ void EfficiencyStudy::getEfficiency(bool isMC) unsigned int rofNEntriesTrack = 0; unsigned int rofIndexClus = 0; unsigned int rofNEntriesClus = 0; - int nLabels = 0; - unsigned int totClus = 0; int nbPt = 75; double xbins[nbPt + 1], ptcutl = 0.05, ptcuth = 7.5; @@ -2054,7 +2194,6 @@ void EfficiencyStudy::getEfficiency(bool isMC) int totNClusters; int nDuplClusters; - // denominator fot the efficiency calculation for (unsigned int iROF = 0; iROF < mTracksROFRecords.size(); iROF++) { // loop on ROFRecords array rofIndexTrack = mTracksROFRecords[iROF].getFirstEntry(); @@ -2068,7 +2207,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) auto track = mTracks[iTrack]; o2::track::TrackParCov trackParCov = mTracks[iTrack]; - auto pt = trackParCov.getPt(); + auto pt = trackParCov.getPt(); // Always 0.6 GeV/c for B = 0 T auto eta = trackParCov.getEta(); float phi = -999.; float phiOriginal = -999.; @@ -2078,24 +2217,17 @@ void EfficiencyStudy::getEfficiency(bool isMC) float ip[2]; track.getImpactParams(0, 0, 0, 0, ip); - float phiTrack = trackParCov.getPhi() * 180 / M_PI; + // float phiTrack = trackParCov.getPhi(); // * 180 / M_PI; - // applying the cuts on the track - only pt and eta, and chi2 cuts since for phi(cluster) the layer is needed - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { + // applying the cuts on the track - only eta + if (eta < mEtaCuts[0] || eta >= mEtaCuts[1]) { continue; } - if (chi2 > mChi2cut) { - continue; - } - - /// the cut on phi, since it is layer-dependent, can be applied only after finding the cluster and then the layer int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track int ncl = track.getNumberOfClusters(); // get the number of clusters of the track + //// keeping only 7 clusters track to reduce fakes if (ncl < 7) { continue; } @@ -2109,7 +2241,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) } if (mVerboseOutput && isMC) { - LOGP(info, "--------- track Label: "); + LOGP(info, "track Label: "); tracklab.print(); } @@ -2119,358 +2251,73 @@ void EfficiencyStudy::getEfficiency(bool isMC) auto layerOriginal = mGeometry->getLayer(clusOriginal.getSensorID()); UShort_t rowOriginal = clusOriginal.getRow(); + UShort_t colOriginal = clusOriginal.getCol(); + /// filling some chip maps + if (clusOriginal.getChipID() >= 0 && clusOriginal.getChipID() <= 8) { + l0_00->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusOriginal.getRow()); + } + if (clusOriginal.getChipID() >= 252 && clusOriginal.getChipID() <= 260) { + l1_15->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusOriginal.getRow()); + } + if (clusOriginal.getChipID() >= 423 && clusOriginal.getChipID() <= 431) { + l2_19->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusOriginal.getRow()); + } + + //// only IB if (layerOriginal >= NLAYERS) { continue; } + chipmap->Fill(clusOriginal.getCol(), clusOriginal.getRow()); + IPOriginalxy[layerOriginal]->Fill(ip[0]); IPOriginalz[layerOriginal]->Fill(ip[1]); + ///// cluster point and conversion from track local coordinates to global coordinates o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - // phiOriginal = std::(clusOriginalPointGlob.y(), clusOriginalPointGlob.x()) * 180 / M_PI + 180; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - - mXoriginal->Fill(clusOriginalPointGlob.x()); - mYoriginal->Fill(clusOriginalPointGlob.y()); - mZoriginal->Fill(clusOriginalPointGlob.z()); - - // std::cout<<" Layer: "<Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); - m3DClusterPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y(), clusOriginalPointGlob.z()); - - /// applying the cuts on the phi of the original cluster - bool keepTrack = false; /// wether or not a cluster is found in an eligible track in the corresponding layer - if (layerOriginal == 0) { - - for (int i = 0; i < 10; i++) { - if ((phiOriginal >= mPhiCutsL0[i][0] && phiOriginal <= mPhiCutsL0[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 1) { - for (int i = 0; i < 12; i++) { - if ((phiOriginal >= mPhiCutsL1[i][0] && phiOriginal <= mPhiCutsL1[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 2) { - for (int i = 0; i < 17; i++) { - if ((phiOriginal >= mPhiCutsL2[i][0] && phiOriginal <= mPhiCutsL2[i][1])) { - keepTrack = true; - } - } - } - - ///////////////////////////////////// - if (!(keepTrack)) { - continue; /// if the track (cluster) is not eligible for any layer, go to the next one - } else { /// fill the den and go ahead - chi2trackAccepted->Fill(chi2); - denPt[layerOriginal]->Fill(pt); - denPhi[layerOriginal]->Fill(phiOriginal); - denEta[layerOriginal]->Fill(eta); - nTracksSelected[layerOriginal]++; + if (abs(clusOriginalPointGlob.y()) < 0.5) { ///// excluding gap between bottom and top barrels + continue; } - /// if the cuts up to here are passed, then search for the duplicated cluster, otherwise go to the next cluster - gsl::span labsOriginal = {}; - if (isMC) { - labsOriginal = mClustersMCLCont->getLabels(mInputITSidxs[iclTrack]); // get labels of the cluster associated to the track (original) + if (abs(clusOriginalPointGlob.z()) >= 10) { /// excluding external z + continue; } - auto staveOriginal = mGeometry->getStave(clusOriginal.getSensorID()); - auto chipOriginal = mGeometry->getChipIdInStave(clusOriginal.getSensorID()); - - std::tuple> clusID_rDCA_label = {0, 999., gsl::span()}; // inizializing tuple with dummy values (if data, ignore the third value) - - bool adjacentFound = 0; - float phiDuplicated = -999.; - float ptDuplicated = -999.; - float etaDuplicated = -999.; - float clusZ = -999.; - /// for each original cluster iterate over all the possible duplicated clusters to see first wether increment or not the denominator (if a track has a possible duplicated cluster in the selected phi region) - /// then if the phi is within the cuts, select the "adjacent" clusters (stave +-1, chip =,+-1) and calculate the DCA with the track. Then choose the closest one. - // std::cout<<"Loop on clusters 2"< clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; - o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; - - //// applying constraints: the cluster should be on the same layer, should be on an adjacent stave and on the same or adjacent chip position - if (clusDuplicated.getSensorID() == clusOriginal.getSensorID()) { - continue; - } - auto layerDuplicated = mGeometry->getLayer(clusDuplicated.getSensorID()); - if (layerDuplicated != layerOriginal) { - continue; - } - auto staveDuplicated = mGeometry->getStave(clusDuplicated.getSensorID()); - if (abs(staveDuplicated - staveOriginal) != 1) { - continue; - } - auto chipDuplicated = mGeometry->getChipIdInStave(clusDuplicated.getSensorID()); - if (abs(chipDuplicated - chipOriginal) > 1) { - continue; - } - - gsl::span labsDuplicated = {}; - if (isMC) { - labsDuplicated = mClustersMCLCont->getLabels(iClus); - } - - /// if the cheks are passed, then calculate the DCA - /// Compute the DCA between the duplicated cluster location and the track - trackParCov.rotate(mGeometry->getSensorRefAlpha(clusDuplicated.getSensorID())); - if (!propagator->propagateToDCA(clusDuplicatedPointGlob, trackParCov, b, 2.f, matCorr, &clusDuplicatedDCA)) { // check if the propagation fails - continue; - } - - DCAxyData[layerDuplicated]->Fill(clusDuplicatedDCA[0]); - DCAzData[layerDuplicated]->Fill(clusDuplicatedDCA[1]); - // std::cout<<"DCA: "<Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersY[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZ[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); - - // Imposing that the distance between the duplicated cluster and the track is less than x sigma - if (!(clusDuplicatedDCA[0] > mDCACutsXY[layerDuplicated][0] && clusDuplicatedDCA[0] < mDCACutsXY[layerDuplicated][1] && clusDuplicatedDCA[1] > mDCACutsZ[layerDuplicated][0] && clusDuplicatedDCA[1] < mDCACutsZ[layerDuplicated][1])) { - DCAxyRejected[layerDuplicated]->Fill(clusDuplicatedDCA[0]); - DCAzRejected[layerDuplicated]->Fill(clusDuplicatedDCA[1]); - continue; - } - - m2DClusterDuplicatedPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y()); - m3DDuplicatedClusterPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.z()); - - mXduplicated->Fill(clusDuplicatedPointGlob.x()); - mYduplicated->Fill(clusDuplicatedPointGlob.y()); - mZduplicated->Fill(clusDuplicatedPointGlob.z()); - - IPOriginalifDuplicatedxy[layerOriginal]->Fill(ip[0]); - IPOriginalifDuplicatedz[layerOriginal]->Fill(ip[1]); - - DistanceClustersXAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersYAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); - - if (mVerboseOutput) { - LOGP(info, "Propagation ok"); - } - double rDCA = std::hypot(clusDuplicatedDCA[0], clusDuplicatedDCA[1]); - - // taking the closest cluster within x sigma - if (rDCA < std::get<1>(clusID_rDCA_label)) { // updating the closest cluster - if (isMC) { - clusID_rDCA_label = {iClus, rDCA, labsDuplicated}; - } else { - clusID_rDCA_label = {iClus, rDCA, gsl::span()}; - } - phiDuplicated = phiOriginal; - ptDuplicated = pt; - etaDuplicated = eta; - clusZ = clusOriginalPointGlob.z(); - } - adjacentFound = 1; - } // end loop on all the clusters in the rof -> at this point we have the information on the closest cluster (if there is one) - - // here clusID_rDCA_label is updated with the closest cluster to the track other than the original one - - if (!adjacentFound) { + if (rowOriginal < 2 || (rowOriginal > 15 && rowOriginal < 496) || rowOriginal > 509) { //// cutting on the row continue; } - nDuplClusters++; - nDuplicatedClusters[layerOriginal]++; - numPt[layerOriginal]->Fill(ptDuplicated); - numPhi[layerOriginal]->Fill(phiDuplicated); - numEta[layerOriginal]->Fill(etaDuplicated); - mZvsPhiDUplicated[layerOriginal]->Fill(clusZ, phiDuplicated); - // checking if it is a good or fake match looking at the labels (only if isMC) - if (isMC) { - bool isGood = false; - for (auto lab : std::get<2>(clusID_rDCA_label)) { - if (lab == tracklab) { - isGood = true; - numPtGood[layerOriginal]->Fill(ptDuplicated); - numPhiGood[layerOriginal]->Fill(phiDuplicated); - numEtaGood[layerOriginal]->Fill(etaDuplicated); - continue; - } - } - if (!isGood) { - numPtFake[layerOriginal]->Fill(ptDuplicated); - numPhiFake[layerOriginal]->Fill(phiDuplicated); - numEtaFake[layerOriginal]->Fill(etaDuplicated); + if (mUseMC) { //// excluding known bad chips in MC which are not bad in data --- to be checked based on the anchored run + if (std::find(mExcludedChipMC.begin(), mExcludedChipMC.end(), clusOriginal.getChipID()) != mExcludedChipMC.end()) { + continue; } } - } // end loop on clusters associated to the track - totNClusters += NLAYERS; - } // end loop on tracks per ROF - } // end loop on ROFRecords array - std::cout << " Num of duplicated clusters L0: " << nDuplicatedClusters[0] << " N tracks selected: " << nTracksSelected[0] << std::endl; - std::cout << " Num of duplicated clusters L1: " << nDuplicatedClusters[1] << " N tracks selected: " << nTracksSelected[1] << std::endl; - std::cout << " Num of duplicated clusters L2: " << nDuplicatedClusters[2] << " N tracks selected: " << nTracksSelected[2] << std::endl; - - std::cout << " --------- N total clusters: " << totNClusters << std::endl; - std::cout << " --------- N duplicated clusters: " << nDuplClusters << std::endl; -} - -void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) -{ - // Extract the efficiency for the IB, exploiting the staves overlaps and the duplicated clusters for the tracks passing through the overlaps - // The denominator for the efficiency calculation will be the number of tracks per layer fulfilling some cuts (DCA, phi, eta, pt) - // The numerator will be the number of duplicated clusters for the tracks passing through the overlaps - // additionally, print/save info (to be used in MC) - - LOGP(info, "--------------- getEfficiency"); - - o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; - auto propagator = o2::base::Propagator::Instance(); - - unsigned int rofIndexTrack = 0; - unsigned int rofNEntriesTrack = 0; - unsigned int rofIndexClus = 0; - unsigned int rofNEntriesClus = 0; - int nLabels = 0; - unsigned int totClus = 0; - - int nbPt = 75; - double xbins[nbPt + 1], ptcutl = 0.05, ptcuth = 7.5; - double a = std::log(ptcuth / ptcutl) / nbPt; - for (int i = 0; i <= nbPt; i++) { - xbins[i] = ptcutl * std::exp(i * a); - } - - int totNClusters; - int nDuplClusters; - - // denominator fot the efficiency calculation - for (unsigned int iROF = 0; iROF < mTracksROFRecords.size(); iROF++) { // loop on ROFRecords array - - rofIndexTrack = mTracksROFRecords[iROF].getFirstEntry(); - rofNEntriesTrack = mTracksROFRecords[iROF].getNEntries(); - - rofIndexClus = mClustersROFRecords[iROF].getFirstEntry(); - rofNEntriesClus = mClustersROFRecords[iROF].getNEntries(); - - ////// imposing cuts on the tracks = collecting tracks for the denominator - for (unsigned int iTrack = rofIndexTrack; iTrack < rofIndexTrack + rofNEntriesTrack; iTrack++) { // loop on tracks per ROF - auto track = mTracks[iTrack]; - o2::track::TrackParCov trackParCov = mTracks[iTrack]; - - auto pt = trackParCov.getPt(); - auto eta = trackParCov.getEta(); - float phi = -999.; - float phiOriginal = -999.; - - float chi2 = track.getChi2(); - - chi2track->Fill(chi2); - - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - - // applying the cuts on the track - only pt and eta cuts since for phi(cluster) the layer is needed - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } - if (chi2 > mChi2cut) { - continue; - } - /// the cut on phi, since it is layer-dependent, can be applied only after finding the cluster and then the layer - - int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track - int ncl = track.getNumberOfClusters(); // get the number of clusters of the track - - if (ncl < 7) { - continue; - } - - o2::MCCompLabel tracklab; - if (isMC) { - tracklab = mTracksMCLabels[iTrack]; - if (tracklab.isFake()) { + if (clusOriginal.getCol() < 160 || clusOriginal.getCol() > 870) { /// excluding the gap between two chips in the same stave (comment to obtain the plot efficiency col vs eta) continue; } - } - - if (mVerboseOutput && isMC) { - LOGP(info, "--------- track Label: "); - tracklab.print(); - } - - for (int iclTrack = firstClus; iclTrack < firstClus + ncl; iclTrack++) { // loop on clusters associated to the track to extract layer, stave and chip to restrict the possible matches to be searched with the DCA cut - auto& clusOriginal = mClusters[mInputITSidxs[iclTrack]]; - auto clusOriginalPoint = mITSClustersArray[mInputITSidxs[iclTrack]]; - auto layerOriginal = mGeometry->getLayer(clusOriginal.getSensorID()); - - UShort_t rowOriginal = clusOriginal.getRow(); - - if (layerOriginal >= NLAYERS) { - continue; - } - - o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; - o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - - mXoriginal->Fill(clusOriginalPointGlob.x()); - mYoriginal->Fill(clusOriginalPointGlob.y()); - mZoriginal->Fill(clusOriginalPointGlob.z()); + /// if the track passes the cuts, fill the den and go ahead m2DClusterOriginalPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); m3DClusterPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y(), clusOriginalPointGlob.z()); + chi2trackAccepted->Fill(chi2); + denPt[layerOriginal]->Fill(pt); + denPhi[layerOriginal]->Fill(phiOriginal); + denEta[layerOriginal]->Fill(eta); + denRow[layerOriginal]->Fill(rowOriginal); + denCol[layerOriginal]->Fill(clusOriginal.getCol()); + denZ[layerOriginal]->Fill(clusOriginalPointGlob.z()); + nTracksSelected[layerOriginal]++; + mDenColEta[layerOriginal]->Fill(clusOriginal.getCol(), eta); + mDenRowPhi[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginalPointGlob.z()); + mDenRowCol[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginal.getCol()); + denLayers->Fill(layerOriginal); - /// applying the cuts on the phi of the original cluster - bool keepTrack = false; /// wether or not a cluster is found in an eligible track in the corresponding layer - - if (layerOriginal == 0) { - for (int i = 0; i < 10; i++) { - if ((phiOriginal >= mPhiCutsL0[i][0] && phiOriginal <= mPhiCutsL0[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 1) { - for (int i = 0; i < 12; i++) { - if ((phiOriginal >= mPhiCutsL1[i][0] && phiOriginal <= mPhiCutsL1[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 2) { - for (int i = 0; i < 17; i++) { - if ((phiOriginal >= mPhiCutsL2[i][0] && phiOriginal <= mPhiCutsL2[i][1])) { - keepTrack = true; - } - } - } - if (!(keepTrack)) { - continue; /// if the track (cluster) is not eligible for any layer, go to the next one - } else { /// fill the den and go ahead - chi2trackAccepted->Fill(chi2); - denPt[layerOriginal]->Fill(pt); - denPhi[layerOriginal]->Fill(phiOriginal); - denEta[layerOriginal]->Fill(eta); - nTracksSelected[layerOriginal]++; - } + /// if the cuts up to here are passed, then search for the duplicated cluster, otherwise go to the next cluster gsl::span labsOriginal = {}; if (isMC) { labsOriginal = mClustersMCLCont->getLabels(mInputITSidxs[iclTrack]); // get labels of the cluster associated to the track (original) @@ -2487,19 +2334,16 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) float etaDuplicated = -999.; float clusZ = -999.; - o2::MCCompLabel labelCandidateDuplicated; - bool duplExists = false; + o2::itsmft::CompClusterExt clusDuplicatedSelected = o2::itsmft::CompClusterExt(); - /// for each original cluster iterate over all the possible duplicated clusters to see first wether increment or not the denominator (if a track has a possible duplicated cluster in the selected phi region) - /// then if the phi is within the cuts, select the "adjacent" clusters (stave +-1, chip =,+-1) and calculate the DCA with the track. Then choose the closest one. + /// for each original cluster iterate over all the possible duplicated clusters to select the "adjacent" clusters (stave +-1, chip =,+-1) and calculate the DCA with the track. Then choose the closest one. for (unsigned int iClus = rofIndexClus; iClus < rofIndexClus + rofNEntriesClus; iClus++) { // iteration over ALL the clusters in the ROF auto clusDuplicated = mClusters[iClus]; - auto clusDuplicatedPoint = mITSClustersArray[iClus]; o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phi = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; //// applying constraints: the cluster should be on the same layer, should be on an adjacent stave and on the same or adjacent chip position if (clusDuplicated.getSensorID() == clusOriginal.getSensorID()) { @@ -2509,12 +2353,6 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) if (layerDuplicated != layerOriginal) { continue; } - labelCandidateDuplicated = mClustersMCLCont->getLabels(iClus)[0]; - if (labelCandidateDuplicated == tracklab) { - duplExists = true; - std::cout << "Duplicated should exist with label: " << labelCandidateDuplicated.asString() << " , phi = " << phi << " and be: "; - clusDuplicated.print(); - } auto staveDuplicated = mGeometry->getStave(clusDuplicated.getSensorID()); if (abs(staveDuplicated - staveOriginal) != 1) { continue; @@ -2524,8 +2362,6 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) continue; } - std::cout << "checks passed" << std::endl; - gsl::span labsDuplicated = {}; if (isMC) { labsDuplicated = mClustersMCLCont->getLabels(iClus); @@ -2538,13 +2374,8 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) continue; } - std::cout << "dca calculated: " << clusDuplicatedDCA[0] << " " << clusDuplicatedDCA[1] << std::endl; - DCAxyData[layerDuplicated]->Fill(clusDuplicatedDCA[0]); DCAzData[layerDuplicated]->Fill(clusDuplicatedDCA[1]); - DistanceClustersX[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersY[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZ[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); // Imposing that the distance between the duplicated cluster and the track is less than x sigma if (!(clusDuplicatedDCA[0] > mDCACutsXY[layerDuplicated][0] && clusDuplicatedDCA[0] < mDCACutsXY[layerDuplicated][1] && clusDuplicatedDCA[1] > mDCACutsZ[layerDuplicated][0] && clusDuplicatedDCA[1] < mDCACutsZ[layerDuplicated][1])) { @@ -2552,15 +2383,9 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) DCAzRejected[layerDuplicated]->Fill(clusDuplicatedDCA[1]); continue; } + m2DClusterDuplicatedPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y()); m3DDuplicatedClusterPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.z()); - mXduplicated->Fill(clusDuplicatedPointGlob.x()); - mYduplicated->Fill(clusDuplicatedPointGlob.y()); - mZduplicated->Fill(clusDuplicatedPointGlob.z()); - - DistanceClustersXAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersYAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); if (mVerboseOutput) { LOGP(info, "Propagation ok"); @@ -2578,32 +2403,46 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) ptDuplicated = pt; etaDuplicated = eta; clusZ = clusOriginalPointGlob.z(); + clusDuplicatedSelected = clusDuplicated; } adjacentFound = 1; - std::cout << "Duplicated found with label: " << labsDuplicated[0] << " and phi: " << phiDuplicated << std::endl; - clusDuplicated.print(); - std::cout << "-----" << std::endl; } // end loop on all the clusters in the rof -> at this point we have the information on the closest cluster (if there is one) // here clusID_rDCA_label is updated with the closest cluster to the track other than the original one - // checking if it is a good or fake match looking at the labels (only if isMC) + if (!adjacentFound) { - if (duplExists) { - std::cout << "No duplicated found but should exist" << std::endl; - std::cout << "DCA cuts were: xy-> " << mDCACutsXY[layerOriginal][0] << " to " << mDCACutsXY[layerOriginal][1] << " and z-> " << mDCACutsZ[layerOriginal][0] << " to " << mDCACutsZ[layerOriginal][1] << "\n-----" << std::endl; - } else { - std::cout << "No duplicated found and does not exist" << std::endl; - } + radiusNotFound[layerOriginal]->Fill(sqrt(clusOriginalPointGlob.x() * clusOriginalPointGlob.x() + clusOriginalPointGlob.y() * clusOriginalPointGlob.y())); + colNotFound[layerOriginal]->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9))); + rowNotFound[layerOriginal]->Fill(rowOriginal); + zNotFound[layerOriginal]->Fill(clusOriginalPointGlob.z()); + phiNotFound[layerOriginal]->Fill(phiOriginal); continue; } - std::cout << "-----" << std::endl; + + chipOrigVsOverlap->Fill(clusOriginal.getChipID() % 9, clusDuplicatedSelected.getChipID() % 9); + mChipFound->Fill(clusOriginal.getChipID()); + zFound[layerOriginal]->Fill(clusOriginalPointGlob.z()); + radiusFound[layerOriginal]->Fill(sqrt(clusOriginalPointGlob.x() * clusOriginalPointGlob.x() + clusOriginalPointGlob.y() * clusOriginalPointGlob.y())); + colFoundOriginalVsDuplicated[layerOriginal]->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusDuplicatedSelected.getCol() + (1024 * (clusDuplicatedSelected.getChipID() % 9))); + colFoundOriginal[layerOriginal]->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9))); + m2DClusterFoundPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); + phiFound[layerOriginal]->Fill(phiOriginal); + rowFound[layerOriginal]->Fill(rowOriginal); nDuplClusters++; nDuplicatedClusters[layerOriginal]++; - numPt[layerOriginal]->Fill(ptDuplicated); + numPt[layerOriginal]->Fill(pt); numPhi[layerOriginal]->Fill(phiDuplicated); numEta[layerOriginal]->Fill(etaDuplicated); + numRow[layerOriginal]->Fill(rowOriginal); + numCol[layerOriginal]->Fill(clusOriginal.getCol()); + numZ[layerOriginal]->Fill(clusOriginalPointGlob.z()); mZvsPhiDUplicated[layerOriginal]->Fill(clusZ, phiDuplicated); + mNumColEta[layerOriginal]->Fill(clusOriginal.getCol(), eta); + mNumRowPhi[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginalPointGlob.z()); + mNumRowCol[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginal.getCol()); + numLayers->Fill(layerOriginal); + // checking if it is a good or fake match looking at the labels (only if isMC) if (isMC) { bool isGood = false; for (auto lab : std::get<2>(clusID_rDCA_label)) { @@ -2612,6 +2451,10 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) numPtGood[layerOriginal]->Fill(ptDuplicated); numPhiGood[layerOriginal]->Fill(phiDuplicated); numEtaGood[layerOriginal]->Fill(etaDuplicated); + numRowGood[layerOriginal]->Fill(rowOriginal); + numColGood[layerOriginal]->Fill(clusOriginal.getCol()); + numZGood[layerOriginal]->Fill(clusOriginalPointGlob.z()); + numGoodLayers->Fill(layerOriginal); continue; } } @@ -2619,6 +2462,10 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) numPtFake[layerOriginal]->Fill(ptDuplicated); numPhiFake[layerOriginal]->Fill(phiDuplicated); numEtaFake[layerOriginal]->Fill(etaDuplicated); + numRowFake[layerOriginal]->Fill(rowOriginal); + numColFake[layerOriginal]->Fill(clusOriginal.getCol()); + numZFake[layerOriginal]->Fill(clusOriginalPointGlob.z()); + numFakeLayers->Fill(layerOriginal); } } } // end loop on clusters associated to the track @@ -2642,16 +2489,14 @@ void EfficiencyStudy::process(o2::globaltracking::RecoContainer& recoData) if (mUseMC) { // getDCAClusterTrackMC(); - // studyDCAcutsMC(); + studyDCAcutsMC(); // studyClusterSelectionMC(); - // getEfficiencyAndTrackInfo(mUseMC); // countDuplicatedAfterCuts(); - } else if (!mUseMC) { - // saveDataInfo(); + getEfficiency(mUseMC); + } else { + getEfficiency(mUseMC); } - getEfficiency(mUseMC); - LOGP(info, "** Found in {} rofs:\n\t- {} clusters\n\t", mClustersROFRecords.size(), mClusters.size()); @@ -2681,21 +2526,13 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) mOutFile->mkdir("EfficiencyFinal/"); mOutFile->mkdir("DCAFinal/"); + mOutFile->mkdir("NotFoundChecks/"); - mOutFile->mkdir("DistanceClusters/"); mOutFile->mkdir("DCA/"); mOutFile->mkdir("Pt_Eta_Phi/"); if (mUseMC) { - mOutFile->cd("DistanceClusters"); - for (int i = 0; i < NLAYERS; i++) { - mDistanceClustersX[i]->Write(); - mDistanceClustersY[i]->Write(); - mDistanceClustersZ[i]->Write(); - mDistanceClusters[i]->Write(); - } - mOutFile->cd("DCA"); mDCAxyDuplicated->Write(); mDCAzDuplicated->Write(); @@ -2709,24 +2546,19 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) mOutFile->cd("Pt_Eta_Phi/"); for (int i = 0; i < NLAYERS; i++) { - mPhiOriginal[i]->Write(); - mPhiTrackOriginal[i]->Write(); mDuplicatedPhiAllPt[i]->Write(); - mPtOriginal[i]->Write(); mPtDuplicated[i]->Write(); mEtaDuplicated[i]->Write(); mPhiDuplicated[i]->Write(); - mPhiTrackDuplicated[i]->Write(); - mPhiTrackDuplicatedvsphiDuplicated[i]->Write(); - mPhiTrackoriginalvsphioriginal[i]->Write(); mPhiOriginalIfDuplicated[i]->Write(); mDuplicatedPt[i]->Write(); mDuplicatedPtEta[i]->Write(); mDuplicatedPtPhi[i]->Write(); mDuplicatedEtaPhi[i]->Write(); - mEtaOriginal[i]->Write(); mDuplicatedEtaAllPt[i]->Write(); mDuplicatedRow[i]->Write(); + mDuplicatedCol[i]->Write(); + mDuplicatedZ[i]->Write(); for (int p = 0; p < 3; p++) { mDuplicatedEta[i][p]->Write(); @@ -2741,7 +2573,6 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) IPOriginalxy[i]->Write(); IPOriginalz[i]->Write(); mPhiOriginal[i]->Write(); - mPhiTrackOriginal[i]->Write(); mPtOriginal[i]->Write(); mEtaOriginal[i]->Write(); mZvsPhiDUplicated[i]->Write(); @@ -2752,75 +2583,245 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) mOutFile->mkdir("chi2"); mOutFile->cd("chi2/"); - chi2track->Write(); chi2trackAccepted->Write(); mOutFile->cd("EfficiencyFinal/"); + TList listNum; + TList listDen; + auto numPhiAll = std::unique_ptr((TH1D*)numPhi[0]->Clone("numPhiAll")); + auto denPhiAll = std::unique_ptr((TH1D*)denPhi[0]->Clone("denPhiAll")); + + TList listNumColEta; + TList listDenColEta; + auto numColEtaAll = std::unique_ptr((TH1D*)mNumColEta[0]->Clone("numColEtaAll")); + auto denColEtaAll = std::unique_ptr((TH1D*)mDenColEta[0]->Clone("denColEtaAll")); + + TList listNumRowPhi; + TList listDenRowPhi; + auto numRowPhiAll = std::unique_ptr((TH1D*)mNumRowPhi[0]->Clone("numRowPhiAll")); + auto denRowPhiAll = std::unique_ptr((TH1D*)mDenRowPhi[0]->Clone("denRowPhiAll")); + + TList listNumRowCol; + TList listDenRowCol; + auto numRowColAll = std::unique_ptr((TH1D*)mNumRowCol[0]->Clone("numRowColAll")); + auto denRowColAll = std::unique_ptr((TH1D*)mDenRowCol[0]->Clone("denRowColAll")); + + std::unique_ptr effLayers = std::make_unique(*numLayers, *denLayers); + effLayers->SetName("effLayers"); + effLayers->SetTitle("; ;Efficiency"); + std::unique_ptr effLayersGood = std::make_unique(*numGoodLayers, *denLayers); + effLayersGood->SetName("effLayersGood"); + effLayersGood->SetTitle("; ;Efficiency Good Matches"); + std::unique_ptr effLayersFake = std::make_unique(*numFakeLayers, *denLayers); + effLayersFake->SetName("effLayersFake"); + effLayersFake->SetTitle("; ;Efficiency Fake Matches"); + effLayers->Write(); + effLayersGood->Write(); + effLayersFake->Write(); + denLayers->Write(); + numLayers->Write(); + numGoodLayers->Write(); + numFakeLayers->Write(); for (int l = 0; l < NLAYERS; l++) { - TEfficiency* effPt = new TEfficiency(*numPt[l], *denPt[l]); + std::unique_ptr effPt = std::make_unique(*numPt[l], *denPt[l]); effPt->SetName(Form("effPt_layer%d", l)); effPt->SetTitle(Form("L%d;p_{T} (GeV/c);Efficiency", l)); - TEfficiency* effPtGood = new TEfficiency(*numPtGood[l], *denPt[l]); + std::unique_ptr effPtGood = std::make_unique(*numPtGood[l], *denPt[l]); effPtGood->SetName(Form("effPtGood_layer%d", l)); effPtGood->SetTitle(Form("L%d;p_{T} (GeV/c);Efficiency Good Matches", l)); - TEfficiency* effPtFake = new TEfficiency(*numPtFake[l], *denPt[l]); + std::unique_ptr effPtFake = std::make_unique(*numPtFake[l], *denPt[l]); effPtFake->SetName(Form("effPtFake_layer%d", l)); effPtFake->SetTitle(Form("L%d;p_{T} (GeV/c);Efficiency Fake Matches", l)); effPt->Write(); effPtGood->Write(); effPtFake->Write(); - TEfficiency* effPhi = new TEfficiency(*numPhi[l], *denPhi[l]); + std::unique_ptr effPhi = std::make_unique(*numPhi[l], *denPhi[l]); effPhi->SetName(Form("effPhi_layer%d", l)); effPhi->SetTitle(Form("L%d;#phi;Efficiency", l)); - TEfficiency* effPhiGood = new TEfficiency(*numPhiGood[l], *denPhi[l]); + std::unique_ptr effPhiGood = std::make_unique(*numPhiGood[l], *denPhi[l]); effPhiGood->SetName(Form("effPhiGood_layer%d", l)); effPhiGood->SetTitle(Form("L%d;#phi;Efficiency Good Matches", l)); - TEfficiency* effPhiFake = new TEfficiency(*numPhiFake[l], *denPhi[l]); + std::unique_ptr effPhiFake = std::make_unique(*numPhiFake[l], *denPhi[l]); effPhiFake->SetName(Form("effPhiFake_layer%d", l)); effPhiFake->SetTitle(Form("L%d;#phi;Efficiency Fake Matches", l)); effPhi->Write(); effPhiGood->Write(); effPhiFake->Write(); + listNum.Add(numPhi[l].get()); + listDen.Add(denPhi[l].get()); - TEfficiency* effEta = new TEfficiency(*numEta[l], *denEta[l]); + std::unique_ptr effEta = std::make_unique(*numEta[l], *denEta[l]); effEta->SetName(Form("effEta_layer%d", l)); effEta->SetTitle(Form("L%d;#eta;Efficiency", l)); - TEfficiency* effEtaGood = new TEfficiency(*numEtaGood[l], *denEta[l]); + std::unique_ptr effEtaGood = std::make_unique(*numEtaGood[l], *denEta[l]); effEtaGood->SetName(Form("effEtaGood_layer%d", l)); effEtaGood->SetTitle(Form("L%d;#eta;Efficiency Good Matches", l)); - TEfficiency* effEtaFake = new TEfficiency(*numEtaFake[l], *denEta[l]); + std::unique_ptr effEtaFake = std::make_unique(*numEtaFake[l], *denEta[l]); effEtaFake->SetName(Form("effEtaFake_layer%d", l)); effEtaFake->SetTitle(Form("L%d;#eta;Efficiency Fake Matches", l)); effEta->Write(); effEtaGood->Write(); effEtaFake->Write(); + std::unique_ptr effRow = std::make_unique(*numRow[l], *denRow[l]); + effRow->SetName(Form("effRow_layer%d", l)); + effRow->SetTitle(Form("L%d;#Row;Efficiency", l)); + std::unique_ptr effRowGood = std::make_unique(*numRowGood[l], *denRow[l]); + effRowGood->SetName(Form("effRowGood_layer%d", l)); + effRowGood->SetTitle(Form("L%d;#Row;Efficiency Good Matches", l)); + std::unique_ptr effRowFake = std::make_unique(*numRowFake[l], *denRow[l]); + effRowFake->SetName(Form("effRowFake_layer%d", l)); + effRowFake->SetTitle(Form("L%d;#Row;Efficiency Fake Matches", l)); + effRow->Write(); + effRowGood->Write(); + effRowFake->Write(); + + std::unique_ptr effCol = std::make_unique(*numCol[l], *denCol[l]); + effCol->SetName(Form("effCol_layer%d", l)); + effCol->SetTitle(Form("L%d;#Col;Efficiency", l)); + std::unique_ptr effColGood = std::make_unique(*numColGood[l], *denCol[l]); + effColGood->SetName(Form("effColGood_layer%d", l)); + effColGood->SetTitle(Form("L%d;#Col;Efficiency Good Matches", l)); + std::unique_ptr effColFake = std::make_unique(*numColFake[l], *denCol[l]); + effColFake->SetName(Form("effColFake_layer%d", l)); + effColFake->SetTitle(Form("L%d;#Col;Efficiency Fake Matches", l)); + effCol->Write(); + effColGood->Write(); + effColFake->Write(); + + std::unique_ptr effZ = std::make_unique(*numZ[l], *denZ[l]); + effZ->SetName(Form("effZ_layer%d", l)); + effZ->SetTitle(Form("L%d;#Z (cm);Efficiency", l)); + std::unique_ptr effZGood = std::make_unique(*numZGood[l], *denZ[l]); + effZGood->SetName(Form("effZGood_layer%d", l)); + effZGood->SetTitle(Form("L%d;#Z (cm);Efficiency Good Matches", l)); + std::unique_ptr effZFake = std::make_unique(*numZFake[l], *denZ[l]); + effZFake->SetName(Form("effZFake_layer%d", l)); + effZFake->SetTitle(Form("L%d;#Z (cm);Efficiency Fake Matches", l)); + effZ->Write(); + effZGood->Write(); + effZFake->Write(); + + std::unique_ptr effColEta = std::make_unique(*mNumColEta[l], *mDenColEta[l]); + effColEta->SetName(Form("effColEta_layer%d", l)); + effColEta->SetTitle(Form("L%d;Column;#eta", l)); + effColEta->Write(); + + listNumColEta.Add(mNumColEta[l].get()); + listDenColEta.Add(mDenColEta[l].get()); + + std::unique_ptr effRowPhi = std::make_unique(*mNumRowPhi[l], *mDenRowPhi[l]); + effRowPhi->SetName(Form("effRowPhi_layer%d", l)); + effRowPhi->SetTitle(Form("L%d;Column;#eta", l)); + effRowPhi->Write(); + + listNumRowPhi.Add(mNumRowPhi[l].get()); + listDenRowPhi.Add(mDenRowPhi[l].get()); + + std::unique_ptr effRowCol = std::make_unique(*mNumRowCol[l], *mDenRowCol[l]); + effRowCol->SetName(Form("effRowCol_layer%d", l)); + effRowCol->SetTitle(Form("L%d;Column;#eta", l)); + effRowCol->Write(); + + listNumRowCol.Add(mNumRowCol[l].get()); + listDenRowCol.Add(mDenRowCol[l].get()); + + mNumRowCol[l]->Write(); + mDenRowCol[l]->Write(); + mNumRowPhi[l]->Write(); + mDenRowPhi[l]->Write(); + mNumColEta[l]->Write(); + mDenColEta[l]->Write(); numPhi[l]->Write(); denPhi[l]->Write(); numPt[l]->Write(); denPt[l]->Write(); numEta[l]->Write(); denEta[l]->Write(); + numRow[l]->Write(); + denRow[l]->Write(); + numCol[l]->Write(); + denCol[l]->Write(); + numZ[l]->Write(); + denZ[l]->Write(); } + numPhiAll->Merge(&listNum); + denPhiAll->Merge(&listDen); + + numColEtaAll->Merge(&listNumColEta); + denColEtaAll->Merge(&listDenColEta); + + numRowPhiAll->Merge(&listNumRowPhi); + denRowPhiAll->Merge(&listDenRowPhi); + + numRowColAll->Merge(&listNumRowCol); + denRowColAll->Merge(&listDenRowCol); + + std::unique_ptr effPhiAll = std::make_unique(*numPhiAll, *denPhiAll); + effPhiAll->SetName("effPhi_AllLayers"); + effPhiAll->SetTitle("L0 + L1 + L2;#phi;Efficiency"); + effPhiAll->Write(); + numPhiAll->Write(); + denPhiAll->Write(); + + std::unique_ptr effColEtaAll = std::make_unique(*numColEtaAll, *denColEtaAll); + effColEtaAll->SetName("effColEta_AllLayers"); + effColEtaAll->SetTitle("L0 + L1 + L2;Column;#eta"); + effColEtaAll->Write(); + numColEtaAll->Write(); + denColEtaAll->Write(); + + std::unique_ptr effRowPhiAll = std::make_unique(*numRowPhiAll, *denRowPhiAll); + effRowPhiAll->SetName("effRowPhi_AllLayers"); + effRowPhiAll->SetTitle("L0 + L1 + L2;Column;#eta"); + effRowPhiAll->Write(); + numRowPhiAll->Write(); + denRowPhiAll->Write(); + + std::unique_ptr effRowColAll = std::make_unique(*numRowColAll, *denRowColAll); + effRowColAll->SetName("effRowCol_AllLayers"); + effRowColAll->SetTitle("L0 + L1 + L2;Column;#eta"); + effRowColAll->Write(); + numRowColAll->Write(); + denRowColAll->Write(); mOutFile->cd("DCAFinal/"); for (int l = 0; l < NLAYERS; l++) { DCAxyData[l]->Write(); DCAzData[l]->Write(); - DistanceClustersX[l]->Write(); - DistanceClustersY[l]->Write(); - DistanceClustersZ[l]->Write(); - DistanceClustersXAftercuts[l]->Write(); - DistanceClustersYAftercuts[l]->Write(); - DistanceClustersZAftercuts[l]->Write(); DCAxyRejected[l]->Write(); DCAzRejected[l]->Write(); } + mOutFile->cd("NotFoundChecks/"); + + for (int l = 0; l < NLAYERS; l++) { + phiFound[l]->Write(); + phiNotFound[l]->Write(); + rowFound[l]->Write(); + rowNotFound[l]->Write(); + zFound[l]->Write(); + zNotFound[l]->Write(); + radiusFound[l]->Write(); + radiusNotFound[l]->Write(); + colFoundOriginalVsDuplicated[l]->Write(); + colFoundOriginal[l]->Write(); + colNotFound[l]->Write(); + } + mChipFound->Write(); + mChipNotFound->Write(); + m2DClusterFoundPositions->Write(); + l0_00->Write(); + l1_15->Write(); + l2_19->Write(); + chipOrigVsOverlap->Write(); + chipmap->SetContour(100); + chipmap->Write(); + mOutFile->Close(); } From d9d78bcededce73fe8ef2e452d81dff9f8c797d2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 09:28:41 +0100 Subject: [PATCH 0149/1764] GPU CMake: Cleanup and do not use Clang x86 denormals flag for device compilation --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +- GPU/GPUTracking/CMakeLists.txt | 5 +- dependencies/FindO2GPU.cmake | 55 +++++++++++----------- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 48f292a198b9c..381c9c050ca09 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -23,9 +23,9 @@ endif() set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) -set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) +set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) + set(OCL_FLAGS ${OCL_FLAGS} -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() set(OCL_FLAGS ${OCL_FLAGS} ${GPUCA_OCL_NO_FAST_MATH_FLAGS}) endif() diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index ad7dd9c210cd1..39218e9f94527 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -17,11 +17,12 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2 ${GPUCA_CLANG_FTZ}") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") endif() elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math ${GPUCA_CLANG_FTZ}") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() +set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}") include(cmake/helpers.cmake) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 56b53e1be8879..d796d0b7145a8 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -85,17 +85,20 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") - set(GPUCA_CLANG_FTZ "") + set(GPUCA_CXX_DENORMALS_FLAGS "") else() - set(GPUCA_CLANG_FTZ "-mdaz-ftz") + set(GPUCA_CXX_DENORMALS_FLAGS "-mdaz-ftz") endif() -set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ}") -set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false") -set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero) +set(GPUCA_CUDA_DENORMALS_FLAGS "--ftz=true") +set(GPUCA_OCL_DENORMALS_FLAGS "-cl-denorms-are-zero") +set(GPUCA_HIP_DENORMALS_FLAGS "-fgpu-flush-denormals-to-zero") +set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") +set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--prec-div=true --prec-sqrt=true --fmad false") +set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt ) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2}) add_definitions(-DGPUCA_DETERMINISTIC_MODE) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") - set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + string(APPEND CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") endif() @@ -143,7 +146,8 @@ if(ENABLE_CUDA) endif() endif() if(CMAKE_CUDA_COMPILER) - set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes") + set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes ${GPUCA_CUDA_DENORMALS_FLAGS}") + set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) string(APPEND CMAKE_CUDA_FLAGS " -Xptxas -v") endif() @@ -151,19 +155,18 @@ if(ENABLE_CUDA) if (NOT ENABLE_CUDA STREQUAL "AUTO") string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler") endif() - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -lineinfo -Xptxas -O0 -Xcompiler -O0") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -lineinfo -Xptxas -O0") else() - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -Xptxas -O4 -Xcompiler -O4") endif() if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")# + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math --ftz=true") endif() if(CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call") + string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call") endif() if(GPUCA_CUDA_GCCBIN) list(FILTER CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES EXCLUDE REGEX "^/usr/lib.*/gcc/") # Workaround, since CMake adds old GCC lib paths implicitly if we request that gcc for CUDA @@ -285,27 +288,25 @@ if(ENABLE_HIP) set(HIP_ENABLED ON) set_target_properties(roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE) message(STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION})") - set(O2_HIP_CMAKE_CXX_FLAGS "-fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed") + set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS}") + set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") + string(APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed ") if(hip_VERSION VERSION_GREATER_EQUAL "6.0" AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.2") - string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup + string(APPEND CMAKE_HIP_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup endif() if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) - string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -Rpass-analysis=kernel-resource-usage") + string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage") + endif() + if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) + string(APPEND CMAKE_HIP_FLAGS " -ffast-math") + endif() + if(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") + string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -O3") endif() string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics if(HIP_AMDGPUTARGET) set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection endif() - if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math") - endif() - set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}") - set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") - if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0 -ggdb") - else() - set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3") - endif() else() set(HIP_ENABLED OFF) endif() From d1fcd960548af1fbc975638e8b08abb44482d2dd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 09:29:05 +0100 Subject: [PATCH 0150/1764] GPU HIP CMake: Remove deprecated option --- dependencies/FindO2GPU.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index d796d0b7145a8..bbbb420354fae 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -291,9 +291,6 @@ if(ENABLE_HIP) set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS}") set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") string(APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed ") - if(hip_VERSION VERSION_GREATER_EQUAL "6.0" AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.2") - string(APPEND CMAKE_HIP_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup - endif() if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage") endif() From 210dc61adc0b14ee8e4ae62916d442dd02ed3f50 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 27 Mar 2025 15:57:38 +0100 Subject: [PATCH 0151/1764] DPL: make input slots display in DebugGUI scrollable (#14041) --- .../Core/include/Framework/DeviceControl.h | 2 + .../src/FrameworkGUIDataRelayerUsage.cxx | 95 ++++++++++++++----- .../src/FrameworkGUIDataRelayerUsage.h | 5 +- .../src/FrameworkGUIDevicesGraph.cxx | 3 +- 4 files changed, 79 insertions(+), 26 deletions(-) diff --git a/Framework/Core/include/Framework/DeviceControl.h b/Framework/Core/include/Framework/DeviceControl.h index ce946e8e77fbf..03889c00f6cf9 100644 --- a/Framework/Core/include/Framework/DeviceControl.h +++ b/Framework/Core/include/Framework/DeviceControl.h @@ -52,6 +52,8 @@ struct DeviceControl { int logStreams = 0; /// An incremental number to identify the device state int requestedState = 0; + /// The first window in the records buffer to display in GUI + int firstWnd = 1; }; } // namespace o2::framework diff --git a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx index 8e683d46131ea..c39e268fa90a7 100644 --- a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx +++ b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx @@ -17,8 +17,7 @@ #include "Framework/DataProcessingStates.h" #include "InspectorHelpers.h" #include "PaletteHelpers.h" -#include "Framework/Logger.h" -#include +#include "FrameworkGUIDataRelayerUsage.h" #include #include @@ -27,11 +26,11 @@ static inline ImVec2 operator-(const ImVec2& lhs, const ImVec2& rhs) { return Im namespace o2::framework::gui { - // This is to display the information in the data relayer struct HeatMapHelper { template - static void draw(const char* name, + static void draw(const char* /*name*/, + int& v, ImVec2 const& sizeHint, std::function const& getNumInputs, std::function const& getNumRecords, @@ -42,21 +41,69 @@ struct HeatMapHelper { std::function const& getColor, std::function const& describeCell) { - ImVec2 size = ImVec2(sizeHint.x, std::min(sizeHint.y, 16.f * getNumItems(0) + 2)); - ImU32 BORDER_COLOR = ImColor(200, 200, 200, 255); - ImU32 BACKGROUND_COLOR = ImColor(20, 20, 20, 255); + float padding = 1; + // add slider to scroll between the grid display windows + size_t nw = getNumRecords() / WND; + ImGui::PushItemWidth(sizeHint.x); + ImGui::SliderInt("##window", &v, 1, nw, "wnd: %d", ImGuiSliderFlags_AlwaysClamp); + ImVec2 sliderMin = ImGui::GetItemRectMin(); + constexpr float MAX_BOX_X_SIZE = 16.f; constexpr float MAX_BOX_Y_SIZE = 16.f; + + ImVec2 size = ImVec2(sizeHint.x, std::min(sizeHint.y, MAX_BOX_Y_SIZE * getNumItems(0) + 2)); + ImU32 BORDER_COLOR = ImColor(200, 200, 200, 255); + ImU32 BACKGROUND_COLOR = ImColor(20, 20, 20, 255); + ImU32 BORDER_COLOR_A = ImColor(200, 200, 200, 0); + ImU32 BACKGROUND_COLOR_A = ImColor(0, 0, 0, 0); + ImDrawList* drawList = ImGui::GetWindowDrawList(); - ImVec2 winPos = ImGui::GetCursorScreenPos() + ImVec2{0, 7}; - auto records = getNumRecords(); - auto boxSizeX = std::min(size.x / records, MAX_BOX_X_SIZE); - auto numInputs = getNumInputs(); + ImVec2 winPos = sliderMin; + // overlay activity indicator on the slider + auto xsz = size.x / nw; + drawList->AddRectFilled( + ImVec2{0., 0.} + winPos, + ImVec2{size.x, size.y} + winPos, + BACKGROUND_COLOR_A); + drawList->AddRect( + ImVec2{0. - 1, -1} + winPos, + ImVec2{size.x + 1, size.y - 1} + winPos, + BORDER_COLOR_A); + + const static auto colorA = ImColor(ImVec4{0.945, 0.096, 0.278, 0.5}); + const static auto colorE = ImColor(ImVec4{0, 0, 0, 0}); + + drawList->PrimReserve(nw * 6, nw * 4); + for (size_t iw = 0; iw < nw; ++iw) { + ImVec2 xOffset{iw * xsz + 2 * padding, 0}; + ImVec2 xSize{xsz - 2 * padding, 0}; + ImVec2 yOffset{0, 2 * padding}; + ImVec2 ySize{0, 16 - 4 * padding}; + bool active = 0; + for (size_t ir = iw; ir < ((iw + WND > getNumRecords()) ? getNumRecords() : iw + WND); ++ir) { + for (size_t i = 0; i < getNumItems(ir); ++i) { + active = getValue(*getItem(ir, i)) > 0; + if (active) { + break; + } + } + } + drawList->PrimRect( + xOffset + yOffset + winPos, + xOffset + xSize + yOffset + ySize + winPos, + active ? colorA : colorE); + } + + // display the grid + size_t recordsWindow = v * WND; + auto boxSizeX = std::min(size.x / WND, MAX_BOX_X_SIZE); + auto numInputs = getNumInputs(); + winPos = ImGui::GetCursorScreenPos() + ImVec2{0, 7}; ImGui::InvisibleButton("sensible area", ImVec2(size.x, size.y)); if (ImGui::IsItemHovered()) { auto pos = ImGui::GetMousePos() - winPos; - auto slot = std::lround(std::trunc(pos.x / size.x * records)); + auto slot = (v - 1) * WND + std::lround(std::trunc(pos.x / size.x * WND)); auto row = std::lround(std::trunc(pos.y / size.y * numInputs)); describeCell(row, slot); } @@ -69,21 +116,21 @@ struct HeatMapHelper { ImVec2(0. - 1, -1) + winPos, ImVec2{size.x + 1, size.y - 1} + winPos, BORDER_COLOR); - float padding = 1; size_t totalRects = 0; - for (size_t ri = 0, re = getNumRecords(); ri < re; ri++) { + for (size_t ri = (v - 1) * WND; ri < recordsWindow; ri++) { auto record = getRecord(ri); totalRects += getNumItems(record); } drawList->PrimReserve(totalRects * 6, totalRects * 4); - for (size_t ri = 0, re = getNumRecords(); ri < re; ri++) { + for (size_t ri = (v - 1) * WND; ri < recordsWindow; ri++) { auto record = getRecord(ri); - ImVec2 xOffset{(ri * boxSizeX) + padding, 0}; + ImVec2 xOffset{((ri - (v - 1) * WND) * boxSizeX) + padding, 0}; ImVec2 xSize{boxSizeX - 2 * padding, 0}; - auto boxSizeY = std::min(size.y / getNumItems(record), MAX_BOX_Y_SIZE); - for (size_t mi = 0, me = getNumItems(record); mi < me; mi++) { + auto me = getNumItems(record); + auto boxSizeY = std::min(size.y / me, MAX_BOX_Y_SIZE); + for (size_t mi = 0; mi < me; mi++) { ImVec2 yOffSet{0, (mi * boxSizeY) + padding}; ImVec2 ySize{0, boxSizeY - 2 * padding}; @@ -98,11 +145,12 @@ struct HeatMapHelper { } }; -void displayDataRelayer(DeviceMetricsInfo const& metrics, - DeviceInfo const& info, +void displayDataRelayer(DeviceMetricsInfo const& /*metrics*/, + DeviceInfo const& /*info*/, DeviceSpec const& spec, DataProcessingStates const& states, - ImVec2 const& size) + ImVec2 const& size, + int& v) { auto getNumInputs = [&states]() -> size_t { auto& inputsView = states.statesViews[(int)ProcessingStateId::DATA_QUERIES]; @@ -146,7 +194,7 @@ void displayDataRelayer(DeviceMetricsInfo const& metrics, } char const* const beginData = strchr(buffer + view.first, ' ') + 1; // Protect against buffer overflows - if (view.size <= beginData - buffer + i - view.first) { + if ((size_t)view.size <= beginData - buffer + i - view.first) { return &error; } return (int8_t const*)beginData + i; }; @@ -184,7 +232,7 @@ void displayDataRelayer(DeviceMetricsInfo const& metrics, if ((end - input) == 0) { continue; } - if (i == row) { + if (i == (size_t)row) { ImGui::Text("%d %.*s (%s)", row, int(end - input), input, InspectorHelpers::getLifeTimeStr(spec.inputs[i].matcher.lifetime).c_str()); break; } @@ -226,6 +274,7 @@ void displayDataRelayer(DeviceMetricsInfo const& metrics, if (getNumRecords()) { HeatMapHelper::draw("DataRelayer", + v, size, getNumInputs, getNumRecords, diff --git a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h index 8c4941474d8a7..8bea06829f0dc 100644 --- a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h +++ b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h @@ -9,6 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "Framework/DeviceSpec.h" class ImVec2; namespace o2::framework @@ -19,9 +20,9 @@ class DataProcessingStates; namespace gui { - +static constexpr int WND = 16; /// View of the DataRelayer metrics for a given DeviceInfo -void displayDataRelayer(DeviceMetricsInfo const& metrics, DeviceInfo const& info, DeviceSpec const& spec, DataProcessingStates const&, ImVec2 const& size); +void displayDataRelayer(DeviceMetricsInfo const& metrics, DeviceInfo const& info, DeviceSpec const& spec, DataProcessingStates const&, ImVec2 const& size, int& v); } // namespace gui } // namespace o2::framework diff --git a/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx b/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx index 89126cf303a66..1c4ddd7e6aabf 100644 --- a/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx +++ b/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx @@ -713,7 +713,8 @@ void showTopologyNodeGraph(WorkspaceGUIState& state, default: break; } - gui::displayDataRelayer(metricsInfos[node->ID], infos[node->ID], specs[node->ID], allStates[node->ID], ImVec2(140., 90.)); + + gui::displayDataRelayer(metricsInfos[node->ID], infos[node->ID], specs[node->ID], allStates[node->ID], ImVec2(200., 160.), controls[node->ID].firstWnd); ImGui::EndGroup(); // Save the size of what we have emitted and whether any of the widgets are being used From 1c4e4a6be81b8b9762db927ebfdf5c636cda23ed Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:51:56 +0100 Subject: [PATCH 0152/1764] DPL: allow searching for plugins in executables as well (#14118) --- Framework/Core/include/Framework/PluginManager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/PluginManager.h b/Framework/Core/include/Framework/PluginManager.h index d6b16f01ad713..96281942e667d 100644 --- a/Framework/Core/include/Framework/PluginManager.h +++ b/Framework/Core/include/Framework/PluginManager.h @@ -87,7 +87,7 @@ struct PluginManager { #else auto libraryName = fmt::format("lib{}.so", loadablePlugin.library); #endif - auto ret = uv_dlopen(libraryName.c_str(), &handle); + auto ret = uv_dlopen(loadablePlugin.library.empty() ? nullptr : libraryName.c_str(), &handle); if (ret != 0) { LOGP(error, "Could not load library {}", loadablePlugin.library); LOG(error) << uv_dlerror(&handle); From 243279af0b17985227f1f3d0aa80794c1549122b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 11:12:37 +0100 Subject: [PATCH 0153/1764] GPU Standalone: unify duplicate denormal code in standalone benchmark --- .../Standalone/Benchmark/standalone.cxx | 22 ---------- GPU/GPUTracking/utils/qmaths_helpers.h | 41 +++++++++++-------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index e9cb7c5179c59..2e3e4725bd6aa 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -53,9 +53,6 @@ #include "GPUTPCGMMergedTrack.h" #include "GPUSettings.h" #include -#if not(defined(__ARM_NEON) or defined(__aarch64__)) // ARM doesn't have SSE -#include -#endif #include "GPUO2DataTypes.h" #include "GPUChainITS.h" @@ -85,23 +82,6 @@ std::atomic nIteration, nIterationEnd; std::vector ioPtrEvents; std::vector ioMemEvents; -void SetCPUAndOSSettings() -{ -#if not(defined(__ARM_NEON) or defined(__aarch64__)) // ARM doesn't have SSE -#ifdef FE_DFL_DISABLE_SSE_DENORMS_ENV // Flush and load denormals to zero in any case - fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV); -#else -#ifndef _MM_FLUSH_ZERO_ON -#define _MM_FLUSH_ZERO_ON 0x8000 -#endif -#ifndef _MM_DENORMALS_ZERO_ON -#define _MM_DENORMALS_ZERO_ON 0x0040 -#endif - _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON)); -#endif -#endif // ARM -} - int32_t ReadConfiguration(int argc, char** argv) { int32_t qcRet = qConfigParse(argc, (const char**)argv); @@ -740,8 +720,6 @@ int32_t main(int argc, char** argv) { std::unique_ptr recUnique, recUniqueAsync, recUniquePipeline; - SetCPUAndOSSettings(); - if (ReadConfiguration(argc, argv)) { return 1; } diff --git a/GPU/GPUTracking/utils/qmaths_helpers.h b/GPU/GPUTracking/utils/qmaths_helpers.h index 9c5f704180aaa..5eb3ce4fb2483 100644 --- a/GPU/GPUTracking/utils/qmaths_helpers.h +++ b/GPU/GPUTracking/utils/qmaths_helpers.h @@ -15,24 +15,33 @@ #ifndef QMATH_HELPERS_H #define QMATH_HELPERS_H -#if defined __has_include -#if __has_include() && __has_include() -#include -#include -#if defined(_MM_FLUSH_ZERO_OFF) && defined(_MM_DENORMALS_ZERO_ON) +#if !(defined(__ARM_NEON) || defined(__aarch64__)) && __has_include() // clang-format off + #include + #if __has_include() + #include + #endif +#elif __has_include() + #include +#endif + static void disable_denormals() { - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#if !(defined(__ARM_NEON) || defined(__aarch64__)) && __has_include() // clang-format off + #if defined(_MM_FLUSH_ZERO_OFF) && defined(_MM_DENORMALS_ZERO_ON) + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + #else + #ifndef _MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_ON 0x8000 + #endif + #ifndef _MM_DENORMALS_ZERO_ON + #define _MM_DENORMALS_ZERO_ON 0x0040 + #endif + _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON)); + #endif +#elif __has_include() && defined(FE_DFL_DISABLE_SSE_DENORMS_ENV) + fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV); +#endif // clang-format on } -#define XMM_HAS_DENORMAL_DEACTIVATE -#endif -#endif -#endif -#ifdef XMM_HAS_DENORMAL_DEACTIVATE -#undef XMM_HAS_DENORMAL_DEACTIVATE -#else -static void disable_denormals() {} -#endif #endif From 6d54cfc154ca9370a30dc5f212e9e40fd972fde1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 22:18:27 +0100 Subject: [PATCH 0154/1764] GPU: Make launch bounds of GPUDefParamters available as C++ struct at runtime --- GPU/Common/GPUCommonDef.h | 2 +- GPU/GPUTracking/Base/GPUConstantMem.h | 4 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 22 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 7 +- .../Base/GPUReconstructionKernelMacros.h | 27 +- .../Base/GPUReconstructionKernels.h | 12 +- .../Base/GPUReconstructionProcessing.cxx | 24 ++ .../Base/GPUReconstructionProcessing.h | 9 +- .../Base/cuda/GPUReconstructionCUDA.cu | 17 +- .../Base/cuda/GPUReconstructionCUDA.h | 2 - .../Base/cuda/GPUReconstructionCUDAKernels.cu | 5 +- .../Base/opencl/GPUReconstructionOCL.cxx | 4 + .../Base/opencl/GPUReconstructionOCL.h | 2 - .../opencl/GPUReconstructionOCLKernels.cxx | 4 +- GPU/GPUTracking/CMakeLists.txt | 12 +- GPU/GPUTracking/Definitions/GPUDef.h | 2 +- .../Definitions/GPUDefConstantsAndSettings.h | 4 +- .../Definitions/GPUDefParameters.template.h | 27 ++ ...Parameters.h => GPUDefParametersDefault.h} | 332 ++++++++++++++++-- .../GPUDefParametersLoad.template.inc | 56 +++ GPU/GPUTracking/Standalone/CMakeLists.txt | 7 +- .../cmake/GPUNoFastMathKernels.template.h | 5 + GPU/GPUTracking/kernels.cmake | 4 +- dependencies/FindO2GPU.cmake | 1 - 24 files changed, 488 insertions(+), 103 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParameters.template.h rename GPU/GPUTracking/Definitions/{GPUDefGPUParameters.h => GPUDefParametersDefault.h} (69%) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index 78da104a0c029..d7e99f53d4ce8 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -58,7 +58,7 @@ #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) #define GPUCA_NO_CONSTANT_MEMORY -#elif defined(__CUDACC__) || defined(__HIPCC__) +#elif (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(GPUCA_GPUCODE_HOSTONLY) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM #endif diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index e0b06f0a3ea55..532c270431d99 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -95,7 +95,7 @@ union GPUConstantMemCopyable { static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1); #endif } // namespace o2::gpu -#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) +#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM namespace o2::gpu @@ -104,7 +104,7 @@ namespace o2::gpu // Must be placed here, to avoid circular header dependency GPUdi() GPUconstantref() const GPUConstantMem* GPUProcessor::GetConstantMem() const { -#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) +#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) return &GPUCA_CONSMEM; #else return mConstantMem; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index a8a83fdbd9203..d5404618c32b1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -16,7 +16,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUReconstructionThreading.h" #include "GPUChain.h" - +#include "GPUDefParameters.h" #include "GPUTPCClusterData.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" @@ -120,15 +120,27 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs -krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend() +template +gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) { - return krnlProperties{1, 1}; + if (gpu == -1) { + gpu = IsGPU(); + } + const auto num = GetKernelNum(); + const auto* p = gpu ? mParDevice : mParCPU; + gpu_reconstruction_kernels::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]}; + if (ret.nThreads == 0) { + ret.nThreads = gpu ? mThreadCount : 1u; + } + if (ret.minBlocks == 0) { + ret.minBlocks = 1; + } + return ret; } #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ - template krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend(); + template krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index fd999ec2304e1..099fed5afacf0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -38,8 +38,6 @@ class GPUReconstructionCPUBackend : public GPUReconstructionProcessing void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); template void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); - template - gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); }; class GPUReconstructionCPU : public GPUReconstructionKernels @@ -55,10 +53,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels void runKernel(krnlSetup&& setup, Args&&... args); template - const gpu_reconstruction_kernels::krnlProperties getKernelProperties() - { - return getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument()); - } + gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu = -1); virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 0b1a501ebc094..b3f6c6ec817fd 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -38,21 +38,18 @@ #ifndef GPUCA_KRNL_CUSTOM #define GPUCA_KRNL_CUSTOM(...) #endif -#define GPUCA_KRNL_REG_EXTRREG(...) GPUCA_M_STRIP(__VA_ARGS__) -#define GPUCA_KRNL_CUSTOM_EXTRREG(MODE, ...) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) -#define GPUCA_KRNL_NONE_EXTRREG(MODE, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_REG(MODE, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, MODE))(num) GPUCA_ATTRRES_XREG (MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_CUSTOM(MODE, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, MODE))(args) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_NONE(MODE, none, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_(MODE, ...) -#define GPUCA_ATTRRES_XNONE(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_XCUSTOM(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_XREG(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_REG(reg, num, ...) GPUCA_M_EXPAND(GPUCA_KRNL_REG)(num) GPUCA_ATTRRES_XREG (__VA_ARGS__) +#define GPUCA_ATTRRES_CUSTOM(custom, args, ...) GPUCA_M_EXPAND(GPUCA_KRNL_CUSTOM)(args) GPUCA_ATTRRES_XCUSTOM(__VA_ARGS__) +#define GPUCA_ATTRRES_NONE(none, ...) GPUCA_ATTRRES_XNONE(__VA_ARGS__) +#define GPUCA_ATTRRES_(...) +#define GPUCA_ATTRRES_XNONE(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) +#define GPUCA_ATTRRES_XCUSTOM(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) +#define GPUCA_ATTRRES_XREG(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) +#define GPUCA_ATTRRES(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) // GPU Kernel entry point #define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(, GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); @@ -79,12 +76,6 @@ } \ }; -#define GPUCA_KRNL_PROP(x_class, x_attributes) \ - template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend() { \ - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ - return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ - } - #endif // GPUCA_GPUCODE #define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index b8f3e3746c743..f3d52da8b5613 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -95,14 +95,10 @@ class GPUReconstructionKernels : public T template using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - virtual void runKernelImpl(const krnlSetupArgs& args) \ - { \ - T::template runKernelBackend(args); \ - } \ - virtual gpu_reconstruction_kernels::krnlProperties getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument) \ - { \ - return T::template getKernelPropertiesBackend(); \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ + virtual void runKernelImpl(const krnlSetupArgs& args) \ + { \ + T::template runKernelBackend(args); \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index bae95ac8d3f38..58df7f01823dc 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -12,11 +12,35 @@ /// \file GPUReconstructionProcessing.cxx /// \author David Rohr +#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS +#include "GPUDefParametersDefault.h" +#include "GPUDefParametersLoad.inc" + #include "GPUReconstructionProcessing.h" #include "GPUReconstructionThreading.h" using namespace o2::gpu; +GPUReconstructionProcessing::GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) +{ + if (mMaster == nullptr) { + mParCPU = new GPUDefParameters(o2::gpu::internal::GPUDefParametersLoad()); + mParDevice = new GPUDefParameters(); + } else { + GPUReconstructionProcessing* master = dynamic_cast(mMaster); + mParCPU = master->mParCPU; + mParDevice = master->mParDevice; + } +} + +GPUReconstructionProcessing::~GPUReconstructionProcessing() +{ + if (mMaster == nullptr) { + delete mParCPU; + delete mParDevice; + } +} + int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) { int32_t nThreads = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index b0466efceac24..f539c91b90a6e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -25,6 +25,8 @@ namespace o2::gpu { +struct GPUDefParameters; + namespace gpu_reconstruction_kernels { struct deviceEvent { @@ -63,7 +65,7 @@ class threadContext class GPUReconstructionProcessing : public GPUReconstruction { public: - ~GPUReconstructionProcessing() override = default; + ~GPUReconstructionProcessing() override; // Threading int32_t getNKernelHostThreads(bool splitCores); @@ -101,7 +103,7 @@ class GPUReconstructionProcessing : public GPUReconstruction }; protected: - GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} + GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); using deviceEvent = gpu_reconstruction_kernels::deviceEvent; static const std::vector mKernelNames; @@ -132,6 +134,9 @@ class GPUReconstructionProcessing : public GPUReconstruction template HighResTimer& getTimer(const char* name, int32_t num = -1); + GPUDefParameters* mParCPU = nullptr; + GPUDefParameters* mParDevice = nullptr; + private: uint32_t getNextTimerId(); timerMeta* getTimerById(uint32_t id, bool increment = true); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index f87d5c8189cdc..8790d7718f517 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -13,8 +13,13 @@ /// \author David Rohr #define GPUCA_GPUCODE_HOSTONLY -#include "GPUReconstructionCUDAIncludesHost.h" +#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS +#include "GPUReconstructionCUDADef.h" +#include "GPUDefParametersDefault.h" +#include "GPUDefParametersLoad.inc" + +#include "GPUReconstructionCUDAIncludesHost.h" #include #include "GPUReconstructionCUDA.h" @@ -51,11 +56,14 @@ GPUReconstructionCUDABackend::GPUReconstructionCUDABackend(const GPUSettingsDevi { if (mMaster == nullptr) { mInternals = new GPUReconstructionCUDAInternals; + *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } + mDeviceBackendSettings.deviceType = DeviceType::CUDA; } GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() { + Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA if (mMaster == nullptr) { delete mInternals; } @@ -69,7 +77,6 @@ int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, con GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) { - mDeviceBackendSettings.deviceType = DeviceType::CUDA; #ifndef __HIPCC__ // CUDA mRtcSrcExtension = ".cu"; mRtcBinExtension = ".fatbin"; @@ -78,11 +85,7 @@ GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg mRtcBinExtension = ".o"; #endif } - -GPUReconstructionCUDA::~GPUReconstructionCUDA() -{ - Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA -} +GPUReconstructionCUDA::~GPUReconstructionCUDA() {} GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index a98b14a873ca0..ac5920f769f25 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -44,8 +44,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void runKernelBackend(const krnlSetupArgs& args); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - template - gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); void getRTCKernelCalls(std::vector& kernels); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 4b3f8a767226c..2596d0e19ec48 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -83,16 +83,13 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_PROP(x_class, x_attributes) \ GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types) \ template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e724f0f2cbfcd..7310b8b6041a9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -12,7 +12,10 @@ /// \file GPUReconstructionOCL.cxx /// \author David Rohr +#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS #include "GPUReconstructionOCLIncludesHost.h" +#include "GPUDefParametersDefault.h" +#include "GPUDefParametersLoad.inc" #include @@ -36,6 +39,7 @@ GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDevice { if (mMaster == nullptr) { mInternals = new GPUReconstructionOCLInternals; + *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } mDeviceBackendSettings.deviceType = DeviceType::OCL; } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 29951cd43f167..16ef9b5e87fe8 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -58,8 +58,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t AddKernel(); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - template - gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); GPUReconstructionOCLInternals* mInternals; float mOclVersion; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index fff69038c056f..f71336ac35e0e 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -91,8 +91,6 @@ int32_t GPUReconstructionOCLBackend::AddKernels() return 0; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_PROP(x_class, x_attributes) \ - template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 39218e9f94527..33715909e810c 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -129,7 +129,7 @@ set(HDRS_INSTALL DataTypes/GPUTriggerOutputs.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h - Definitions/GPUDefGPUParameters.h + Definitions/GPUDefParametersDefault.h Definitions/GPUDef.h Definitions/GPUDefMacros.h Definitions/GPULogging.h @@ -247,10 +247,18 @@ file(GENERATE OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h INPUT cmake/GPUNoFastMathKernels.template.h ) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUDefParameters.h + INPUT Definitions/GPUDefParameters.template.h +) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUDefParametersLoad.inc + INPUT Definitions/GPUDefParametersLoad.template.inc +) if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2") include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) endif() -set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h) +set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h) include(kernels.cmake) # Optional sources depending on optional dependencies diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index f01e3e6d38332..404f35f971c94 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" -#include "GPUDefGPUParameters.h" +#include "GPUDefParametersDefault.h" #include "GPUCommonRtypes.h" // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index f18390629f2bc..2d7aca8d71b92 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -13,7 +13,7 @@ /// \author David Rohr // This files contains compile-time constants affecting the GPU algorithms / reconstruction results. -// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefGPUParameters.h +// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters.h #ifndef GPUDEFCONSTANTSANDSETTINGS_H #define GPUDEFCONSTANTSANDSETTINGS_H @@ -66,7 +66,7 @@ #endif #endif -//#define GPUCA_MERGER_BY_MC_LABEL // Use MC labels for TPC track merging - for performance studies +//#define GPUCA_MERGER_BY_MC_LABEL // Use MC labels for TPC track merging - for performance studies // TODO: Cleanup unneeded options //#define GPUCA_FULL_CLUSTERDATA // Store all cluster information in the cluster data, also those not needed for tracking. //#define GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME // Propagate Pad, Row, Time cluster information to GM //#define GPUCA_GM_USE_FULL_FIELD // Use offline magnetic field during GMPropagator prolongation diff --git a/GPU/GPUTracking/Definitions/GPUDefParameters.template.h b/GPU/GPUTracking/Definitions/GPUDefParameters.template.h new file mode 100644 index 0000000000000..731cb76b89193 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParameters.template.h @@ -0,0 +1,27 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParameters.h +/// \author David Rohr + +#ifndef GPUDEFPARAMETERS_H +#define GPUDEFPARAMETERS_H + +namespace o2::gpu +{ +struct GPUDefParameters { // clang-format off + int32_t par_LB_maxThreads[$>] = {}; + int32_t par_LB_minBlocks[$>] = {}; + int32_t par_LB_forceBlocks[$>] = {}; +}; // clang-format on +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h similarity index 69% rename from GPU/GPUTracking/Definitions/GPUDefGPUParameters.h rename to GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 910907368e891..7051fff2f177d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUDefGPUParameters.h +/// \file GPUDefParametersDefault.h /// \author David Rohr // This files contains compile-time constants affecting the GPU performance. @@ -17,14 +17,10 @@ // This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc. // Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h -#ifndef GPUDEFGPUPARAMETERS_H -#define GPUDEFGPUPARAMETERS_H +#ifndef GPUDEFPARAMETERSDEFAULT_H +#define GPUDEFPARAMETERSDEFAULT_H // clang-format off -#ifndef GPUDEF_H -#error Please include GPUDef.h -#endif - #include "GPUCommonDef.h" #include "GPUDefMacros.h" @@ -282,7 +278,7 @@ #endif // GPUCA_GPUCODE #ifdef GPUCA_GPUCODE - // Default settings, if not already set for selected GPU type + // Default settings for GPU, if not already set for selected GPU type #ifndef GPUCA_THREAD_COUNT #define GPUCA_THREAD_COUNT 256 #endif @@ -334,10 +330,10 @@ #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256 #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256 #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256 #endif #ifndef GPUCA_LB_GPUTPCCFDecodeZS @@ -487,10 +483,15 @@ #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256 #endif + #ifndef GPUCA_LB_GPUMemClean16 + #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1 + #endif + #ifndef GPUCA_LB_GPUitoa + #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1 + #endif #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) #else - // The following defaults are needed to compile the host code - #define GPUCA_GET_THREAD_COUNT(...) 1 + #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. #endif #define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) @@ -523,33 +524,33 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER #if defined(__CUDACC__) || defined(__HIPCC__) -#define GPUCA_SPECIALIZE_THRUST_SORTS + #define GPUCA_SPECIALIZE_THRUST_SORTS #endif #ifndef GPUCA_NEIGHBORSFINDER_REGS -#define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 + #define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 #endif #ifdef GPUCA_GPUCODE #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 #endif #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 #endif #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 + #define GPUCA_ALTERNATE_BORDER_SORT 0 #endif #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 + #define GPUCA_SORT_BEFORE_FIT 0 #endif #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 #endif #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 + #define GPUCA_COMP_GATHER_KERNEL 0 #endif #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 + #define GPUCA_COMP_GATHER_MODE 2 #endif #else #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 @@ -562,20 +563,20 @@ #define GPUCA_COMP_GATHER_MODE 0 #endif #ifndef GPUCA_DEDX_STORAGE_TYPE -#define GPUCA_DEDX_STORAGE_TYPE float + #define GPUCA_DEDX_STORAGE_TYPE float #endif #ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #endif #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) #define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) #ifndef GPUCA_WARP_SIZE -#ifdef GPUCA_GPUCODE -#define GPUCA_WARP_SIZE 32 -#else -#define GPUCA_WARP_SIZE 1 -#endif + #ifdef GPUCA_GPUCODE + #define GPUCA_WARP_SIZE 32 + #else + #define GPUCA_WARP_SIZE 1 + #endif #endif #define GPUCA_MAX_THREADS 1024 @@ -602,10 +603,10 @@ // #define GPUCA_KERNEL_DEBUGGER_OUTPUT -// Some assertions to make sure out parameters are not invalid -static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); -static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); -#ifdef GPUCA_GPUCODE +// Some assertions to make sure the parameters are not invalid +#if defined(GPUCA_GPUCODE) + static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); + static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); #endif @@ -621,5 +622,270 @@ static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid #define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) #define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT +#ifdef GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS + // Invalid default values, must not be used, but needed for now to make the GPUDefParametersLoad() happy // TOCO: cleanup + #ifndef GPUCA_LB_GPUTPCCreateTrackingData + #define GPUCA_LB_GPUTPCCreateTrackingData 0 + #endif + #ifndef GPUCA_LB_GPUTPCTrackletConstructor + #define GPUCA_LB_GPUTPCTrackletConstructor 0 + #endif + #ifndef GPUCA_LB_GPUTPCTrackletSelector + #define GPUCA_LB_GPUTPCTrackletSelector 0 + #endif + #ifndef GPUCA_LB_GPUTPCNeighboursFinder + #define GPUCA_LB_GPUTPCNeighboursFinder 0 + #endif + #ifndef GPUCA_LB_GPUTPCNeighboursCleaner + #define GPUCA_LB_GPUTPCNeighboursCleaner 0 + #endif + #ifndef GPUCA_LB_GPUTPCExtrapolationTracking + #define GPUCA_LB_GPUTPCExtrapolationTracking 0 + #endif + #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion + #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 0 + #endif + #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill + #define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 0 + #endif + #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold + #define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 0 + #endif + #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version + #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 0 + #endif + #ifndef GPUCA_LB_GPUTPCConvertKernel + #define GPUCA_LB_GPUTPCConvertKernel 0 + #endif + #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 0 + #endif + #ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow + #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters + #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters + #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDecodeZS + #define GPUCA_LB_GPUTPCCFDecodeZS 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDecodeZSLink + #define GPUCA_LB_GPUTPCCFDecodeZSLink 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFGather + #define GPUCA_LB_GPUTPCCFGather 0 + #endif + #ifndef GPUCA_LB_COMPRESSION_GATHER + #define GPUCA_LB_COMPRESSION_GATHER 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerTrackFit + #define GPUCA_LB_GPUTPCGMMergerTrackFit 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerClearLinks + #define GPUCA_LB_GPUTPCGMMergerClearLinks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE + #define GPUCA_LB_GPUTPCGMMergerMergeCE 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerCollect + #define GPUCA_LB_GPUTPCGMMergerCollect 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0 + #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1 + #define GPUCA_LB_GPUTPCGMMergerFinalize_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2 + #define GPUCA_LB_GPUTPCGMMergerFinalize_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 + #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 + #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 + #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_prepare + #define GPUCA_LB_GPUTPCGMO2Output_prepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_output + #define GPUCA_LB_GPUTPCGMO2Output_output 0 + #endif + #ifndef GPUCA_LB_GPUITSFitterKernels + #define GPUCA_LB_GPUITSFitterKernels 0 + #endif + #ifndef GPUCA_LB_GPUTPCStartHitsFinder + #define GPUCA_LB_GPUTPCStartHitsFinder 0 + #endif + #ifndef GPUCA_LB_GPUTPCStartHitsSorter + #define GPUCA_LB_GPUTPCStartHitsSorter 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFPeakFinder + #define GPUCA_LB_GPUTPCCFPeakFinder 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFNoiseSuppression + #define GPUCA_LB_GPUTPCCFNoiseSuppression 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDeconvolution + #define GPUCA_LB_GPUTPCCFDeconvolution 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFClusterizer + #define GPUCA_LB_GPUTPCCFClusterizer 0 + #endif + #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels 0 + #endif + #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU + #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 0 + #endif + #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov + #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 0 + #endif + #ifndef GPUCA_LB_GPUMemClean16 + #define GPUCA_LB_GPUMemClean16 0 + #endif + #ifndef GPUCA_LB_GPUitoa + #define GPUCA_LB_GPUitoa 0 + #endif + #ifndef GPUCA_LB_GPUTPCExtrapolationTrackingCopyNumbers + #define GPUCA_LB_GPUTPCExtrapolationTrackingCopyNumbers 0 + #endif + #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_hitData + #define GPUCA_LB_GPUTPCSectorDebugSortKernels_hitData 0 + #endif + #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_startHits + #define GPUCA_LB_GPUTPCSectorDebugSortKernels_startHits 0 + #endif + #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_sectorTracks + #define GPUCA_LB_GPUTPCSectorDebugSortKernels_sectorTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_clearIds + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_clearIds 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_sectorTracks + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_sectorTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks1 + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks2 + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_borderTracks + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_borderTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerUnpackSaveNumber + #define GPUCA_LB_GPUTPCGMMergerUnpackSaveNumber 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step1 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_variant + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_variant 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSortTracks + #define GPUCA_LB_GPUTPCGMMergerSortTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksQPt + #define GPUCA_LB_GPUTPCGMMergerSortTracksQPt 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_sort + #define GPUCA_LB_GPUTPCGMO2Output_sort 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_mc + #define GPUCA_LB_GPUTPCGMO2Output_mc 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFMCLabelFlattener_setRowOffsets + #define GPUCA_LB_GPUTPCCFMCLabelFlattener_setRowOffsets 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFMCLabelFlattener_flatten + #define GPUCA_LB_GPUTPCCFMCLabelFlattener_flatten 0 + #endif +#endif // GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS + // clang-format on -#endif +#endif // GPUDEFPARAMETERSDEFAULT_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc new file mode 100644 index 0000000000000..953750b6f925b --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -0,0 +1,56 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParametersLoad.inc +/// \author David Rohr + +#include "GPUDefParameters.h" +#include "GPUDefMacros.h" +#include +#include + +namespace o2::gpu::internal +{ + +static GPUDefParameters GPUDefParametersLoad() +{ + return GPUDefParameters{ + // clang-format off + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_LB_>,APPEND,)>,$>}, + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0))>,$>}, + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>} + // clang-format on + }; +} + +#define GPUCA_EXPORT_KERNEL(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define " GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ + i++; + +static std::string GPUDefParametersExport(const GPUDefParameters& par) +{ + std::stringstream o; // clang-format off + int32_t i = 0; + $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL(>,APPEND,)>, + > + return o.str(); // clang-format on +} + +} // namespace o2::gpu::internal diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index ed4fc5c9f7e2d..dfc8e8db3bc7a 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -62,8 +62,11 @@ if (GPUCA_BUILD_DEBUG_SANITIZE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") endif() endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") -set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") +string(APPEND CMAKE_CXX_FLAGS " -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +string(APPEND CMAKE_SHARED_LINKER_FLAGS " -rdynamic -Wl,--no-undefined") +if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + string(APPEND CMAKE_CXX_FLAGS " -Wno-vla-cxx-extension") +endif() # Find mandatory packages find_package(TBB REQUIRED) diff --git a/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h index dac93277d5ec9..499672bf00b50 100644 --- a/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h +++ b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h @@ -12,6 +12,9 @@ /// \file GPUNoFastMathKernels.h /// \author David Rohr +#ifndef GPUNOFASTMATHKERNELS_H +#define GPUNOFASTMATHKERNELS_H + #include #include @@ -21,3 +24,5 @@ namespace o2::gpu::internal static const std::unordered_set noFastMathKernels = {$>,APPEND,">,PREPEND,">,$ >}; // clang-format on } // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index ad348a84264f0..6d8b49116accc 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -37,8 +37,8 @@ o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRAC o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr "uint64_t" size) o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index bbbb420354fae..95db55041184f 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -82,7 +82,6 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") message(FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE") endif() set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}}) - message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") set(GPUCA_CXX_DENORMALS_FLAGS "") From a0e63ef302252475223531d74e7871dd942c951d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 15:52:06 +0100 Subject: [PATCH 0155/1764] GPU TPC: Drop unused TrackletConstructor_allSectors kernel --- .../Definitions/GPUDefParametersDefault.h | 2 - .../Global/GPUChainTrackingSectorTracker.cxx | 2 +- .../GPUTPCTrackletConstructor.cxx | 75 +------------------ .../SectorTracker/GPUTPCTrackletConstructor.h | 11 +-- GPU/GPUTracking/kernels.cmake | 3 +- 5 files changed, 4 insertions(+), 89 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 7051fff2f177d..1193731acd9cf 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -515,8 +515,6 @@ #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCTrackletConstructor_singleSector GPUCA_LB_GPUTPCTrackletConstructor -#define GPUCA_LB_GPUTPCTrackletConstructor_allSectors GPUCA_LB_GPUTPCTrackletConstructor #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index e161f74a31032..3e7447892307a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -105,7 +105,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); // Initialize Startup Constants - processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; + processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase()); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 5a7df0ba8b874..71df683eee1dc 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -476,7 +476,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCT } template <> -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) { if (get_local_id(0) == 0) { sMem.mNStartHits = *tracker.NStartHits(); @@ -491,79 +491,6 @@ GPUdii() void GPUTPCTrackletConstructor::Thread -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) -{ - GPUconstantref() GPUTPCTracker* GPUrestrict() pTracker = &tracker0; -#ifdef GPUCA_GPUCODE - int32_t mySector = get_group_id(0) % GPUCA_NSECTORS; - int32_t currentSector = -1; - - if (get_local_id(0) == 0) { - sMem.mNextStartHitFirstRun = 1; - } - GPUCA_UNROLL(, U()) - for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { - GPUconstantref() GPUTPCTracker& GPUrestrict() tracker = pTracker[mySector]; - - GPUTPCThreadMemory rMem; - - while ((rMem.mISH = FetchTracklet(tracker, sMem)) != -2) { - if (rMem.mISH >= 0 && get_local_id(0) < GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor)) { - rMem.mISH += get_local_id(0); - } else { - rMem.mISH = -1; - } - - if (mySector != currentSector) { - if (get_local_id(0) == 0) { - sMem.mNStartHits = *tracker.NStartHits(); - } - CA_SHARED_CACHE(&sMem.mRows[0], tracker.TrackingDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); - GPUbarrier(); - currentSector = mySector; - } - - if (rMem.mISH >= 0 && rMem.mISH < sMem.mNStartHits) { - rMem.mGo = true; - DoTracklet(tracker, sMem, rMem); - } - } - if (++mySector >= GPUCA_NSECTORS) { - mySector = 0; - } - } -#else - for (int32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { - Thread(nBlocks, nThreads, iBlock, iThread, sMem, pTracker[iSector]); - } -#endif -} - -#ifdef GPUCA_GPUCODE - -GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUSharedMemory& sMem) -{ - const uint32_t nStartHit = *tracker.NStartHits(); - GPUbarrier(); - if (get_local_id(0) == 0) { - int32_t firstStartHit = -2; - if (sMem.mNextStartHitFirstRun == 1) { - firstStartHit = (get_group_id(0) - tracker.ISector()) / GPUCA_NSECTORS * GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor); - sMem.mNextStartHitFirstRun = 0; - } else { - if (tracker.GPUParameters()->nextStartHit < nStartHit) { - firstStartHit = CAMath::AtomicAdd(&tracker.GPUParameters()->nextStartHit, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor)); - } - } - sMem.mNextStartHitFirst = firstStartHit < (int32_t)nStartHit ? firstStartHit : -2; - } - GPUbarrier(); - return (sMem.mNextStartHitFirst); -} - -#endif // GPUCA_GPUCODE - template <> // FIXME: GPUgeneric() needed to make the clang spirv output link correctly GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCExtrapolationTracking::GPUSharedMemory& sMem, GPUTPCTrackParam& GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 0f8314ee0fad4..af87d0276f1c7 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -28,14 +28,9 @@ namespace o2::gpu */ class GPUTPCTracker; -class GPUTPCTrackletConstructor +class GPUTPCTrackletConstructor : public GPUKernelTemplate { public: - enum K { - singleSector = 0, - allSectors = 1 - }; - class GPUTPCThreadMemory { friend class GPUTPCTrackletConstructor; //! friend class @@ -89,10 +84,6 @@ class GPUTPCTrackletConstructor GPUd() static void DoTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUTPCTrackletConstructor::GPUSharedMemory& sMem, GPUTPCThreadMemory& rMem); -#ifdef GPUCA_GPUCODE - GPUd() static int32_t FetchTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& sMem); -#endif // GPUCA_GPUCODE - template GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 6d8b49116accc..994f10a516b10 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -34,8 +34,7 @@ o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRAC o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr "uint64_t" size) From a5caa277d89ede13c8a8161438a9c1eda1cb14a2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 17:00:24 +0100 Subject: [PATCH 0156/1764] GPU: Count kernel number in Cmake, and pass to MACROS, to simplify the preprocessor logic --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 2 +- .../Base/GPUReconstructionProcessing.cxx | 44 ++++++------------- .../Base/GPUReconstructionProcessing.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 6 +-- .../opencl/GPUReconstructionOCLKernels.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 4 +- 7 files changed, 24 insertions(+), 38 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d5404618c32b1..d714c6833d18d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -138,7 +138,7 @@ gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProper return ret; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ template krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 099fed5afacf0..48d6ddf17959a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -72,7 +72,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ { \ if (cpuFallback) { \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 58df7f01823dc..95a47dec946e6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -143,38 +143,22 @@ std::unique_ptr GPUReconstructionProc gpu_reconstruction_kernels::threadContext::threadContext() = default; gpu_reconstruction_kernels::threadContext::~threadContext() = default; -template -uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t k) -{ - static int32_t num = k; - if (num < 0) { - throw std::runtime_error("Internal Error - Kernel Number not Set"); - } - return num; -} - -namespace o2::gpu::internal -{ -static std::vector initKernelNames() -{ - std::vector retVal; -#define GPUCA_KRNL(x_class, ...) \ - GPUReconstructionProcessing::GetKernelNum(retVal.size()); \ - retVal.emplace_back(GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class))); +const std::vector GPUReconstructionProcessing::mKernelNames = { +#define GPUCA_KRNL(x_class, ...) GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)), #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL - return retVal; -} -} // namespace o2::gpu::internal - -const std::vector GPUReconstructionProcessing::mKernelNames = o2::gpu::internal::initKernelNames(); - -#define GPUCA_KRNL(x_class, ...) \ - template uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t); \ - template <> \ - const char* GPUReconstructionProcessing::GetKernelName() \ - { \ - return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ +}; + +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ + template <> \ + uint32_t GPUReconstructionProcessing::GetKernelNum() \ + { \ + return x_num; \ + } \ + template <> \ + const char* GPUReconstructionProcessing::GetKernelName() \ + { \ + return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index f539c91b90a6e..2428027118c0a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -80,7 +80,7 @@ class GPUReconstructionProcessing : public GPUReconstruction static const char* GetKernelName(); const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; } template - static uint32_t GetKernelNum(int32_t k = -1); + static uint32_t GetKernelNum(); // Public queries for timers auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 2596d0e19ec48..ac79dd7576e48 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -83,14 +83,14 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #ifndef __HIPCC__ // CUDA version diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index f71336ac35e0e..cca634fba65fc 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -91,6 +91,6 @@ int32_t GPUReconstructionOCLBackend::AddKernels() return 0; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 99699cc72e940..3c1ad9658566b 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -64,7 +64,9 @@ function(o2_gpu_add_kernel kernel_name kernel_files) endif() set(TMP_PRE "") set(TMP_POST "") - set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_extra}), (${OPT1}), (${OPT2}), (${OPT3}))\n") + get_property(LIST_KERNELS TARGET O2_GPU_KERNELS PROPERTY O2_GPU_KERNELS) + list(LENGTH LIST_KERNELS KERNEL_COUNT) + set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_extra}), (${OPT1}), (${OPT2}), (${OPT3}), ${KERNEL_COUNT})\n") separate_arguments(kernel_files NATIVE_COMMAND ${kernel_files}) list(GET kernel_files 0 TMP_KERNEL_CLASS_FILE) if (TMP_KERNEL_CLASS_FILE STREQUAL "=") From 90f376389dafe933649987a4cdeaf3e03dcc01bf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 17:02:08 +0100 Subject: [PATCH 0157/1764] GPU: Rewrite virtual kernel call to a single virtual function, should enable further simplifications in the future --- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 6 ++++-- GPU/GPUTracking/Base/GPUReconstructionKernels.h | 16 +++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 48d6ddf17959a..163b00c804d7f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -75,10 +75,12 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ { \ + krnlSetupArgs args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \ + const uint32_t num = GetKernelNum(); \ if (cpuFallback) { \ - GPUReconstructionCPU::runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ + GPUReconstructionCPU::runKernelImpl(num, &args); \ } else { \ - runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ + runKernelImpl(num, &args); \ } \ } #include "GPUReconstructionKernelList.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index f3d52da8b5613..7f500d471de1f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -95,13 +95,19 @@ class GPUReconstructionKernels : public T template using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - virtual void runKernelImpl(const krnlSetupArgs& args) \ - { \ - T::template runKernelBackend(args); \ - } + virtual void runKernelImpl(const int num, const void* args) + { + switch (num) { // clang-format off +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ + case x_num: { \ + const auto& args2 = *(const krnlSetupArgs*)args; \ + T::template runKernelBackend(args2); \ + break; \ + } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL + } // clang-format on + } }; } // namespace o2::gpu From 3f679a66429822f285a152c16ee9959556bdba84 Mon Sep 17 00:00:00 2001 From: Andreas Molander Date: Thu, 20 Mar 2025 10:43:35 +0200 Subject: [PATCH 0158/1764] FIT: Geometry alignment macros - Make misalignment macros usable for creating exact misalignments - Add macro for reading misalignments from CCDB - Add a couple of plots to hit reading marcos (needed to verify misalignments) - Minor change to FV0 geometry to provoding symbolic names of alignable volumes --- Detectors/FIT/FT0/macros/FT0Misaligner.C | 44 ++++++++++------ .../FIT/FV0/base/include/FV0Base/Geometry.h | 12 +++++ Detectors/FIT/FV0/macros/FV0Misaligner.C | 46 ++++++++++++----- Detectors/FIT/FV0/simulation/src/Detector.cxx | 19 +++---- Detectors/FIT/macros/CMakeLists.txt | 4 ++ Detectors/FIT/macros/readAlignParam.C | 51 +++++++++++++++++++ Detectors/FIT/macros/readFT0hits.C | 29 +++++++++-- Detectors/FIT/macros/readFV0hits.C | 37 ++++++++++++-- 8 files changed, 196 insertions(+), 46 deletions(-) create mode 100644 Detectors/FIT/macros/readAlignParam.C diff --git a/Detectors/FIT/FT0/macros/FT0Misaligner.C b/Detectors/FIT/FT0/macros/FT0Misaligner.C index 7585411066934..9621d1a079bc9 100644 --- a/Detectors/FIT/FT0/macros/FT0Misaligner.C +++ b/Detectors/FIT/FT0/macros/FT0Misaligner.C @@ -1,15 +1,30 @@ +// Copyright 2021-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file FT0Misaligner.C +/// \brief ROOT macro for creating an FT0 geometry alignment object. Based on ITSMisaligner.C +/// +/// \author Andreas Molander andreas.molander@cern.ch, Alla Maevskaya + #if !defined(__CLING__) || defined(__ROOTCLING__) -//#define ENABLE_UPGRADES + +#include "CCDB/CcdbApi.h" #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" #include "DetectorsCommonDataFormats/AlignParam.h" -#include "DetectorsBase/GeometryManager.h" -#include "CCDB/CcdbApi.h" -#include "FT0Base/Geometry.h" -#include + #include #include #include + #endif using AlgPar = std::array; @@ -23,19 +38,15 @@ void FT0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" const std::string& fileName = "FT0Alignment.root") { std::vector params; - o2::base::GeometryManager::loadGeometry("", false); - // auto geom = o2::ft0::Geometry::Instance(); AlgPar pars; bool glo = true; o2::detectors::DetID detFT0("FT0"); - // FT0 detector - //set A side std::string symNameA = "FT0A"; pars = generateMisalignment(xA, yA, zA, psiA, thetaA, phiA); params.emplace_back(symNameA.c_str(), -1, pars[0], pars[1], pars[2], pars[3], pars[4], pars[5], glo); - //set C side + std::string symNameC = "FT0C"; pars = generateMisalignment(xC, yC, zC, psiC, thetaC, phiC); params.emplace_back(symNameC.c_str(), -1, pars[0], pars[1], pars[2], pars[3], pars[4], pars[5], glo); @@ -57,14 +68,15 @@ void FT0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" algFile.Close(); } } + AlgPar generateMisalignment(double x, double y, double z, double psi, double theta, double phi) { AlgPar pars; - pars[0] = gRandom->Gaus(0, x); - pars[1] = gRandom->Gaus(0, y); - pars[2] = gRandom->Gaus(0, z); - pars[3] = gRandom->Gaus(0, psi); - pars[4] = gRandom->Gaus(0, theta); - pars[5] = gRandom->Gaus(0, phi); + pars[0] = x; + pars[1] = y; + pars[2] = z; + pars[3] = psi; + pars[4] = theta; + pars[5] = phi; return std::move(pars); } diff --git a/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h b/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h index 3b50be7441ec2..ec87c07c57c45 100644 --- a/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h +++ b/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h @@ -133,6 +133,16 @@ class Geometry return o2::base::GeometryManager::getPNEntry(getDetID(), index); } + static std::string getDetectorRightSymName() + { + return sDetectorRightName + "_0"; + } + + static std::string getDetectorLeftSymName() + { + return sDetectorLeftName + "_1"; + } + /// Get the density of the PMTs. static constexpr float getPmtDensity() { @@ -143,6 +153,8 @@ class Geometry explicit Geometry(EGeoType initType); inline static const std::string sDetectorName = "FV0"; + inline static const std::string sDetectorRightName = sDetectorName + "RIGHT"; + inline static const std::string sDetectorLeftName = sDetectorName + "LEFT"; // General geometry constants static constexpr float sEpsilon = 0.01; ///< Used to make one spatial dimension infinitesimally larger than other diff --git a/Detectors/FIT/FV0/macros/FV0Misaligner.C b/Detectors/FIT/FV0/macros/FV0Misaligner.C index 500bdaf565965..88f7a0b82b8b3 100644 --- a/Detectors/FIT/FV0/macros/FV0Misaligner.C +++ b/Detectors/FIT/FV0/macros/FV0Misaligner.C @@ -1,13 +1,32 @@ +// Copyright 2021-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file FV0Misaligner.C +/// \brief ROOT macro for creating an FV0 geometry alignment object. The alignment object will align both +/// detector halves in the same way. Based on ITSMisaligner.C +/// +/// \author Andreas Molander andreas.molander@cern.ch, Alla Maevskaya + #if !defined(__CLING__) || defined(__ROOTCLING__) + +#include "CCDB/CcdbApi.h" #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" #include "DetectorsCommonDataFormats/AlignParam.h" -#include "DetectorsBase/GeometryManager.h" -#include "CCDB/CcdbApi.h" -#include +#include "FV0Base/Geometry.h" + #include #include #include + #endif using AlgPar = std::array; @@ -20,16 +39,14 @@ void FV0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" const std::string& fileName = "FV0Alignment.root") { std::vector params; - o2::base::GeometryManager::loadGeometry("", false); AlgPar pars; bool glo = true; o2::detectors::DetID detFV0("FV0"); - // FV0 detector - for (int ihalf = 1; ihalf < 3; ihalf++) { - std::string symName = Form("FV0half_%i", ihalf); - pars = generateMisalignment(x, y, z, psi, theta, phi); + pars = generateMisalignment(x, y, z, psi, theta, phi); + + for (auto& symName : {o2::fv0::Geometry::getDetectorRightSymName(), o2::fv0::Geometry::getDetectorLeftSymName()}) { params.emplace_back(symName.c_str(), -1, pars[0], pars[1], pars[2], pars[3], pars[4], pars[5], glo); } @@ -50,14 +67,15 @@ void FV0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" algFile.Close(); } } + AlgPar generateMisalignment(double x, double y, double z, double psi, double theta, double phi) { AlgPar pars; - pars[0] = gRandom->Gaus(0, x); - pars[1] = gRandom->Gaus(0, y); - pars[2] = gRandom->Gaus(0, z); - pars[3] = gRandom->Gaus(0, psi); - pars[4] = gRandom->Gaus(0, theta); - pars[5] = gRandom->Gaus(0, phi); + pars[0] = x; + pars[1] = y; + pars[2] = z; + pars[3] = psi; + pars[4] = theta; + pars[5] = phi; return std::move(pars); } diff --git a/Detectors/FIT/FV0/simulation/src/Detector.cxx b/Detectors/FIT/FV0/simulation/src/Detector.cxx index 8cf1f5530e93d..07eb9053bf3b8 100644 --- a/Detectors/FIT/FV0/simulation/src/Detector.cxx +++ b/Detectors/FIT/FV0/simulation/src/Detector.cxx @@ -280,6 +280,7 @@ void Detector::ConstructGeometry() // mGeometry->enableComponent(Geometry::eAluminiumContainer, false); mGeometry->buildGeometry(); } + void Detector::addAlignableVolumes() const { // @@ -292,19 +293,19 @@ void Detector::addAlignableVolumes() const LOG(info) << "FV0: Add alignable volumes"; if (!gGeoManager) { - LOG(fatal) << "TGeoManager doesn't exist !"; + LOG(fatal) << "TGeoManager doesn't exist!"; return; } - TString volPath, symName; - for (auto& half : {"RIGHT_0", "LEFT_1"}) { - volPath = Form("/cave_1/barrel_1/FV0_1/FV0%s", half); - symName = Form("FV0%s", half); - LOG(info) << "FV0: Add alignable volume: " << symName << ": " << volPath; - if (!gGeoManager->SetAlignableEntry(symName.Data(), volPath.Data())) { - LOG(fatal) << "FV0: Unable to set alignable entry! " << symName << ": " << volPath; + auto addAlignabelVolume = [](const std::string& volPath, const std::string& symName) -> void { + LOG(info) << "FV0: Add alignable volume: " << symName << " <-> " << volPath; + if (!gGeoManager->SetAlignableEntry(symName.c_str(), volPath.c_str())) { + LOG(fatal) << "FV0: Unable to set alignable entry! " << symName << " <-> " << volPath; } - } + }; + + addAlignabelVolume("/cave_1/barrel_1/FV0_1/FV0RIGHT_0", Geometry::getDetectorRightSymName()); + addAlignabelVolume("/cave_1/barrel_1/FV0_1/FV0LEFT_1", Geometry::getDetectorLeftSymName()); } o2::fv0::Hit* Detector::addHit(Int_t trackId, Int_t cellId, diff --git a/Detectors/FIT/macros/CMakeLists.txt b/Detectors/FIT/macros/CMakeLists.txt index e7debb4184325..a6bf1799a5dde 100644 --- a/Detectors/FIT/macros/CMakeLists.txt +++ b/Detectors/FIT/macros/CMakeLists.txt @@ -45,5 +45,9 @@ o2_add_test_root_macro(compareRecPoints.C O2::DataFormatsFIT LABELS fit) +o2_add_test_root_macro(readAlignParam.C + PUBLIC_LINK_LIBRARIES O2::CCDB + LABELS fit) + o2_data_file(COPY readFITDCSdata.C DESTINATION Detectors/FIT/macros/) o2_data_file(COPY readFITDeadChannelMap.C DESTINATION Detectors/FIT/macros/) \ No newline at end of file diff --git a/Detectors/FIT/macros/readAlignParam.C b/Detectors/FIT/macros/readAlignParam.C new file mode 100644 index 0000000000000..c438e7a0c86a5 --- /dev/null +++ b/Detectors/FIT/macros/readAlignParam.C @@ -0,0 +1,51 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file readAlignParam.C +/// \brief ROOT macro for reading geometry alignment parameters +/// +/// \author Andreas Molander + +#if !defined(__CLING__) || defined(__ROOTCLING__) + +#include "CCDB/BasicCCDBManager.h" +#include "DetectorsCommonDataFormats/AlignParam.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DetectorsCommonDataFormats/DetectorNameConf.h" + +#include +#include + +#endif + +int readAlignParam(const std::string& detectorName = "FT0", + long timestamp = -1, + const std::string& ccdbUrl = "https://alice-ccdb.cern.ch") +{ + o2::ccdb::BasicCCDBManager& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); + ccdbManager.setURL(ccdbUrl); + ccdbManager.setTimestamp(timestamp); + + const o2::detectors::DetID detID(detectorName.c_str()); + const std::string alignmentPath = o2::base::DetectorNameConf::getAlignmentPath(detID); + const auto alignments = ccdbManager.get>(alignmentPath); + + if (!alignments) { + std::cerr << "No alignment parameters found at " << alignmentPath << std::endl; + return 1; + } + + for (auto alignment : *alignments) { + alignment.print(); + } + + return 0; +} \ No newline at end of file diff --git a/Detectors/FIT/macros/readFT0hits.C b/Detectors/FIT/macros/readFT0hits.C index 14d25fa4a99a8..fafcaac570311 100644 --- a/Detectors/FIT/macros/readFT0hits.C +++ b/Detectors/FIT/macros/readFT0hits.C @@ -1,13 +1,29 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #if !defined(__CLING__) || defined(__ROOTCLING__) + +#include "DataFormatsFIT/Triggers.h" #include "DataFormatsFT0/Digit.h" #include "DataFormatsFT0/HitType.h" #include "SimulationDataFormat/MCEventHeader.h" #include +#include #include #include #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" +#endif + void readFT0hits() { @@ -24,6 +40,8 @@ void readFT0hits() TH2F* hPel = new TH2F("hPelDig", "N p.e. ", 220, 0, 220, 500, 0, 10000); TH2F* hXYA = new TH2F("hXYA", "X vs Y A side", 400, -20, 20, 400, -20, 20); TH2F* hXYC = new TH2F("hXYC", "X vs Y C side", 400, -20, 20, 400, -20, 20); + TH1F* hZA = new TH1F("hZA", "Z A side", 200, 330, 340); + TH1F* hZC = new TH1F("hZC", "Z C side", 200, -90, -80); gDirectory = cwd; @@ -59,10 +77,13 @@ void readFT0hits() hTimeHitA->Fill(detID, hit_time[detID] - 11.04); hTimeHitC->Fill(detID, hit_time[detID] - 2.91); countE[detID]++; - if (detID < 96) + if (detID < 96) { hXYA->Fill(hit.GetX(), hit.GetY()); - if (detID > 95) + hZA->Fill(hit.GetZ()); + } else { hXYC->Fill(hit.GetX(), hit.GetY()); + hZC->Fill(hit.GetZ()); + } } for (int ii = 0; ii < 220; ii++) { if (countE[ii] > 100) { @@ -82,6 +103,6 @@ void readFT0hits() hMultHit->Write(); hXYA->Write(); hXYC->Write(); - + hZA->Write(); + hZC->Write(); } // end of macro -#endif diff --git a/Detectors/FIT/macros/readFV0hits.C b/Detectors/FIT/macros/readFV0hits.C index 5b0dfa8428dc7..933138fb1434b 100644 --- a/Detectors/FIT/macros/readFV0hits.C +++ b/Detectors/FIT/macros/readFV0hits.C @@ -1,3 +1,14 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #if !defined(__CLING__) || defined(__ROOTCLING__) #include @@ -22,6 +33,8 @@ #include "DetectorsCommonDataFormats/DetectorNameConf.h" #include "DetectorsCommonDataFormats/DetID.h" +#endif + void AdjustStatBox(TH1* h, float x1ndc, float x2ndc, float y1ndc, float y2ndc) { gPad->Update(); @@ -54,6 +67,9 @@ void InitHistoNames(std::vector& vhName, std::vector& vPdg) vhName.push_back("hElossDet"); vhName.push_back("hEtotVsR"); vhName.push_back("hEtotVsEloss"); + vhName.push_back("hXY"); + vhName.push_back("hXYzoom"); + vhName.push_back("hZ"); for (UInt_t ipdg = 0; ipdg < vPdg.size(); ipdg++) { std::stringstream ss; @@ -63,7 +79,7 @@ void InitHistoNames(std::vector& vhName, std::vector& vPdg) } } -void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) +void readFV0hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) { using namespace o2::detectors; std::string simFName(o2::base::DetectorNameConf::getHitsFileName(DetID::FV0, simPrefix)); @@ -85,6 +101,9 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) TH2F* hElossDet = new TH2F(vHistoNames.at(8).c_str(), "", nEl, 0, el1, nCells, 0, nCells); TH2F* hEtotVsR = new TH2F(vHistoNames.at(9).c_str(), "", 30000, 0, 300, 80, 0, 80); TH2F* hEtotVsEloss = new TH2F(vHistoNames.at(10).c_str(), "", 30000, 0, 300, nEl, 0, el1); + TH2F* hXY = new TH2F(vHistoNames.at(11).c_str(), "", 200, -100, 100, 200, -100, 100); + TH2F* hXYzoom = new TH2F(vHistoNames.at(12).c_str(), "", 200, -20, 20, 200, -20, 20); + TH1F* hZ = new TH1F(vHistoNames.at(13).c_str(), "", 200, 315, 325); // Setup histo properties hElossDet->SetXTitle("Energy loss [MeV]"); @@ -96,6 +115,14 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) hEtotVsEloss->SetXTitle("Total energy at entrance [MeV]"); hEtotVsEloss->SetYTitle("Energy loss [MeV]"); hEtotVsEloss->SetZTitle("Counts"); + hXY->SetXTitle("X [cm]"); + hXY->SetYTitle("Y [cm]"); + hXY->SetZTitle("Counts"); + hXYzoom->SetXTitle("X [cm]"); + hXYzoom->SetYTitle("Y [cm]"); + hXYzoom->SetZTitle("Counts"); + hZ->SetXTitle("Hit Z-coordinate [cm]"); + hZ->SetYTitle("Counts"); for (UInt_t ih = 0; ih < vhElossVsDistance.size(); ih++) { TH2F* h = vhElossVsDistance.at(ih); std::stringstream ss; @@ -124,6 +151,9 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) vh.push_back(hEtotVsEloss); vh.insert(vh.end(), vhElossVsDistance.begin(), vhElossVsDistance.end()); vh.insert(vh.end(), vhElossVsEtot.begin(), vhElossVsEtot.end()); + vh.push_back(hXY); + vh.push_back(hXYzoom); + vh.push_back(hZ); for (UInt_t ih = 0; ih < vh.size(); ih++) { vh[ih]->SetDirectory(0); vh[ih]->GetXaxis()->SetTitleSize(fontsize); @@ -177,6 +207,9 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) vhElossVsDistance.at(vhElossVsDistance.size() - 1)->Fill(hit->GetEnergyLoss() * 1e3, distance); vhElossVsEtot.at(vhElossVsEtot.size() - 1)->Fill(hit->GetEnergyLoss() * 1e3, hit->GetTotalEnergyAtEntrance() * 1e3); } + hXY->Fill(hit->GetX(), hit->GetY()); + hXYzoom->Fill(hit->GetX(), hit->GetY()); + hZ->Fill(hit->GetZ()); } } @@ -323,5 +356,3 @@ int compareFV0Hits(std::string simFName1 = "fv0hit-rawhistos.root", std::string } return 0; } - -#endif From 5a2ecfc89359b70fdc20c4bd05d0cf62c22a889b Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Fri, 28 Mar 2025 09:25:38 +0100 Subject: [PATCH 0159/1764] Update tests in Clean PR action (#14119) --- .github/workflows/clean-test.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/clean-test.yml b/.github/workflows/clean-test.yml index cbc524910c33e..0f15301d4eed9 100644 --- a/.github/workflows/clean-test.yml +++ b/.github/workflows/clean-test.yml @@ -19,10 +19,6 @@ name: Clean PR checks # Warning: the check_* keys are magic and must consist of the string # "check_" followed by the applicable check name exactly. The # "description" field is only the human-readable label for the input. - 'check_build/O2/o2': - description: build/O2/o2 - type: boolean - default: true 'check_build/AliceO2/O2/o2/macOS': description: build/AliceO2/O2/o2/macOS type: boolean @@ -31,14 +27,10 @@ name: Clean PR checks description: build/AliceO2/O2/o2/macOS-arm type: boolean default: true - 'check_build/O2/fullCI': + 'check_build/O2/fullCI_slc9': description: build/O2/fullCI type: boolean default: true - 'check_build/O2/o2-cs8': - description: build/O2/o2-cs8 - type: boolean - default: true 'check_build/O2/o2-dataflow-cs8': description: build/O2/o2-dataflow-cs8 type: boolean From 1e69f5ebf9c1100d52752410592a9198345bc0a7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 28 Mar 2025 12:45:28 +0100 Subject: [PATCH 0160/1764] GPU TPC: Fix Bz=0 threshold --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 288a24dee5d99..f03964d35ff82 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1740,7 +1740,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.DzDs() = p2.DzDs(); p1.QPt() = p2.QPt(); mergedTrack.SetAlpha(p2.Alpha()); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) { + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } From ebf86708921b595c8c7c190e0a6bb0acb058d05e Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Fri, 28 Mar 2025 12:08:28 +0100 Subject: [PATCH 0161/1764] Generator example for quick HepMC extraction from Pythia8 --- .../Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro | 44 +++++++++++++++++++ .../Pythia8_HepMC_Wrapper/README.md | 16 +++++++ run/SimExamples/Pythia8_HepMC_Wrapper/run.sh | 19 ++++++++ run/SimExamples/README.md | 1 + 4 files changed, 80 insertions(+) create mode 100644 run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro create mode 100644 run/SimExamples/Pythia8_HepMC_Wrapper/README.md create mode 100755 run/SimExamples/Pythia8_HepMC_Wrapper/run.sh diff --git a/run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro b/run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro new file mode 100644 index 0000000000000..9e60bc0a5d851 --- /dev/null +++ b/run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro @@ -0,0 +1,44 @@ +/// \author Marco Giacalone - March 2025 + +// A simple wrapper and demonstrator around Pythia8 for extracting HepMC3 files. + +#include "Pythia8/Pythia.h" +#include "Pythia8Plugins/HepMC3.h" + +using namespace o2::eventgen; + +class HepMC3_Pythia8Wrapper : public GeneratorPythia8 +{ + public: + HepMC3_Pythia8Wrapper(std::string filename = "pythia8.hepmc") : GeneratorPythia8(), mFileName(filename) + { + // HepMC conversion object. + mToHepMC = std::make_unique(); + mToHepMC->setNewFile((filename == "" ? "pythia.hepmc" : filename)); + }; + ~HepMC3_Pythia8Wrapper() = default; + + bool importParticles() override + { + // events are written after the importParticles step + // since some filtering is happening there + auto ret = GeneratorPythia8::importParticles(); + if (ret) { + LOG(info) << "Writing event to HepMC3 format"; + mToHepMC->writeNextEvent(mPythia); + } + return ret; + }; + + private: + std::string mFileName = "pythia8.hepmc"; + std::unique_ptr mToHepMC; +}; + +FairGenerator* + hepmc_pythia8(std::string filename = "pythia8.hepmc") +{ + std::cout << "HepMC3_Pythia8Wrapper initialising with filename: " << filename << std::endl; + auto py8 = new HepMC3_Pythia8Wrapper(filename); + return py8; +} diff --git a/run/SimExamples/Pythia8_HepMC_Wrapper/README.md b/run/SimExamples/Pythia8_HepMC_Wrapper/README.md new file mode 100644 index 0000000000000..a334b7b3ef81e --- /dev/null +++ b/run/SimExamples/Pythia8_HepMC_Wrapper/README.md @@ -0,0 +1,16 @@ + + +This example demonstrates how we can extend GeneratorPythia8 in a user-defined macro (or external generator), +to achieve additional HepMC3 export of generated Pythia8 events. + +The example provides a small utility for poeple in need to obtain HepMC files from Pythia8. +Note that many other methods to achieve this are possible (See original Pythia8 example). + +The example provides: + +- The external generator implementation `Pythia8HepMC3.C` +- a `run.sh` script demonstrating it's usage and a check feeding back the generated hepmc into the simulation + + diff --git a/run/SimExamples/Pythia8_HepMC_Wrapper/run.sh b/run/SimExamples/Pythia8_HepMC_Wrapper/run.sh new file mode 100755 index 0000000000000..16ff80f76b1d4 --- /dev/null +++ b/run/SimExamples/Pythia8_HepMC_Wrapper/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# +# Script doing Pythia8 event generation and writing these events into HepMC3 files +# (next to generating the usual MCTrack kinematics output). +# +# The script also performs a second event generation based on the generated HepMC3 files. +# In principle it should yield identical kinematics files. +# + +NEVENTS=1000 +SEED=11 + +o2-sim -j 1 -g external --configKeyValues 'GeneratorExternal.fileName=Pythia8HepMC3.macro;GeneratorExternal.funcName=hepmc_pythia8("skimmed.hepmc");GeneratorPythia8.config=${O2_ROOT}/share/Generators/egconfig/pythia8_inel.cfg' --seed ${SEED} --noGeant -o pythia8_skimmed -n ${NEVENTS} +o2-sim -j 1 -g external --configKeyValues 'GeneratorExternal.fileName=Pythia8HepMC3.macro;GeneratorExternal.funcName=hepmc_pythia8("unskimmed.hepmc");GeneratorPythia8.config=${O2_ROOT}/share/Generators/egconfig/pythia8_inel.cfg;GeneratorPythia8.includePartonEvent=true' --seed ${SEED} --noGeant -o pythia8_unskimmed -n ${NEVENTS} + +# propagate generated hepmc file; it should produce the same kinematics as the original Pythia8 +o2-sim -j 1 -g hepmc --configKeyValues="GeneratorFileOrCmd.fileNames=skimmed.hepmc" --vertexMode kNoVertex --noGeant -o fromhepmc_skimmed -n ${NEVENTS} --seed ${SEED} +o2-sim -j 1 -g hepmc --configKeyValues="GeneratorFileOrCmd.fileNames=unskimmed.hepmc" --vertexMode kNoVertex --noGeant -o fromhepmc_unskimmed -n ${NEVENTS} --seed ${SEED} diff --git a/run/SimExamples/README.md b/run/SimExamples/README.md index 725d60c4854ca..3a54625acf413 100644 --- a/run/SimExamples/README.md +++ b/run/SimExamples/README.md @@ -6,6 +6,7 @@ n+1 (alpide manual) + toa = (sdel_i * mStep) + 1; break; } } @@ -1146,24 +1140,23 @@ std::vector ITSThresholdCalibrator::calculatePulseParams(const short int& countTot++; } - if (rt_maxdel > rt_mindel && rt_maxdel > 0 && rt_mindel > 0) { - sumRt += rt_maxdel - rt_mindel + mStrobeWindow; - sumSqRt += (rt_maxdel - rt_mindel + mStrobeWindow) * (rt_maxdel - rt_mindel + mStrobeWindow); - countRt++; + if (toa > 0) { + sumToA += toa + float(mStrobeWindow) / 2.; + sumSqToA += (toa + float(mStrobeWindow) / 2.) * (toa + float(mStrobeWindow) / 2.); + countToA++; } - rt_mindel = -1; - rt_maxdel = -1; + toa = -1.; tot_maxdel = -1; tot_mindel = -1; } // end loop over col_i } // end loop over chip rows - std::vector output; // {avgRt, rmsRt, avgTot, rmsTot} + std::vector output; // {avgToA, rmsToA, avgTot, rmsTot} // Avg Rt - output.push_back(!countRt ? 0. : (float)sumRt / (float)countRt); + output.push_back(!countToA ? 0. : (float)sumToA / (float)countToA); // Rms Rt - output.push_back(!countRt ? 0. : (std::sqrt((float)sumSqRt / (float)countRt - output[0] * output[0])) * 25.); + output.push_back(!countToA ? 0. : (std::sqrt((float)sumSqToA / (float)countToA - output[0] * output[0])) * 25.); output[0] *= 25.; // Avg ToT output.push_back(!countTot ? 0. : (float)sumTot / (float)countTot); @@ -1728,8 +1721,8 @@ void ITSThresholdCalibrator::addDatabaseEntry( o2::dcs::addConfigItem(this->mTuning, "ChipDbID", std::to_string(confDBid)); o2::dcs::addConfigItem(this->mTuning, "Tot", std::to_string(data[2])); // time over threshold o2::dcs::addConfigItem(this->mTuning, "TotRms", std::to_string(data[3])); // time over threshold rms - o2::dcs::addConfigItem(this->mTuning, "Rt", std::to_string(data[0])); // rise time - o2::dcs::addConfigItem(this->mTuning, "RtRms", std::to_string(data[1])); // rise time rms + o2::dcs::addConfigItem(this->mTuning, "ToA", std::to_string(data[0])); // rise time + o2::dcs::addConfigItem(this->mTuning, "ToARms", std::to_string(data[1])); // rise time rms } //- Pulse shape 2D: avgToT, rmsToT, MTC, rmsMTC, avgMTCD, rmsMTCD, avgMPL, rmsMPL, avgMPLC, rmsMPLC From 24c97f247de4de1b811157c401bd2e9ef80f155b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 9 Apr 2025 09:23:07 +0200 Subject: [PATCH 0206/1764] DPL: fix setting of run number (#14152) This is actually a workaround. The real issue is that timer at the moment are completely outside of the data streaming and therefore do not have access to the DataTakingService, where the proper calculation for the run number happens and it's cached. OK for now. In the future we should make sure that the LifetimeHelpers::enumerate gets a "Streaming" context, not the global one. --- Framework/Core/src/LifetimeHelpers.cxx | 2 +- Framework/Core/test/test_SimpleTimer.cxx | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Framework/Core/src/LifetimeHelpers.cxx b/Framework/Core/src/LifetimeHelpers.cxx index 4621738ec154e..21aa29c1f10e9 100644 --- a/Framework/Core/src/LifetimeHelpers.cxx +++ b/Framework/Core/src/LifetimeHelpers.cxx @@ -423,7 +423,7 @@ ExpirationHandler::Handler LifetimeHelpers::enumerate(ConcreteDataMatcher const& dh.payloadSerializationMethod = gSerializationMethodNone; dh.tfCounter = timestamp; try { - dh.runNumber = atoi(services.get().runNumber.c_str()); + dh.runNumber = strtoull(services.get().device()->fConfig->GetProperty("runNumber", "0").c_str(), nullptr, 10); } catch (...) { dh.runNumber = 0; } diff --git a/Framework/Core/test/test_SimpleTimer.cxx b/Framework/Core/test/test_SimpleTimer.cxx index df935eb6eb2a0..b2b5a4558f961 100644 --- a/Framework/Core/test/test_SimpleTimer.cxx +++ b/Framework/Core/test/test_SimpleTimer.cxx @@ -38,7 +38,10 @@ std::vector defineDataProcessing(ConfigContext const&) InputSpec{"atimer", "TST", "TIMER", 0, Lifetime::Timer}}, {}, AlgorithmSpec{ - adaptStateless([](ControlService& control) { + adaptStateless([](ControlService& control, InputRecord& inputs) { + DataRef ref = inputs.get("atimer"); + auto* header = o2::header::get(ref.header); + LOG(info) << "Run number: " << header->runNumber; // This is invoked autonomously by the timer. control.readyToQuit(QuitRequest::Me); })}}, From 26402846cfd9e7dfb2bfe3f7ff09345ce794aa5e Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 9 Apr 2025 21:45:14 +0200 Subject: [PATCH 0207/1764] DPL: set run number also on EoS (#14158) --- .../Core/src/ExternalFairMQDeviceProxy.cxx | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx index 449beb0cb8c0b..e67e484f7faf5 100644 --- a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx +++ b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx @@ -397,6 +397,11 @@ void injectMissingData(fair::mq::Device& device, fair::mq::Parts& parts, std::ve } std::string missing = ""; bool showAlarm = false; + uint32_t runNumber = 0; + try { + runNumber = strtoul(device.fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } for (auto mi : unmatchedDescriptions) { auto& spec = routes[mi].matcher; missing += " " + DataSpecUtils::describe(spec); @@ -412,6 +417,7 @@ void injectMissingData(fair::mq::Device& device, fair::mq::Parts& parts, std::ve dh.dataDescription = concrete.description; dh.subSpecification = *subSpec; dh.payloadSize = 0; + dh.runNumber = runNumber; dh.splitPayloadParts = 0; dh.splitPayloadIndex = 0; dh.payloadSerializationMethod = header::gSerializationMethodNone; @@ -504,7 +510,8 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL LOG(error) << "unexpected nullptr found. Skipping message pair."; continue; } - const auto dh = o2::header::get(parts.At(msgidx)->GetData()); + auto* header = parts.At(msgidx)->GetData(); + const auto dh = o2::header::get(header); if (!dh) { LOG(error) << "data on input " << msgidx << " does not follow the O2 data model, DataHeader missing"; if (msgidx > 0) { @@ -512,7 +519,7 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL } continue; } - auto dph = o2::header::get(parts.At(msgidx)->GetData()); + auto dph = o2::header::get(header); if (!dph) { LOG(error) << "data on input " << msgidx << " does not follow the O2 data model, DataProcessingHeader missing"; continue; @@ -527,7 +534,7 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL timingInfo.runNumber = dh->runNumber; timingInfo.tfCounter = dh->tfCounter; LOG(debug) << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); - if (dh->runNumber == 0 || dh->tfCounter == 0 || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { + if (dh->runNumber == 0 || (dh->tfCounter == 0 && o2::header::get(header) == nullptr) || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { LOG(error) << "INVALID runNumber / tfCounter: runNumber " << dh->runNumber << ", tfCounter " << dh->tfCounter << ", FMQ runNumber " << fmqRunNumber << " for msgidx " << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); @@ -623,6 +630,11 @@ InjectorFunction incrementalConverter(OutputSpec const& spec, o2::header::Serial auto timesliceId = std::make_shared(startTime); return [timesliceId, spec, step, method](TimingInfo&, ServiceRegistryRef const& services, fair::mq::Parts& parts, ChannelRetriever channelRetriever, size_t newTimesliceId, bool&) { auto* device = services.get().device(); + uint32_t runNumber = 0; + try { + runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } // We iterate on all the parts and we send them two by two, // adding the appropriate O2 header. for (int i = 0; i < parts.Size(); ++i) { @@ -635,6 +647,7 @@ InjectorFunction incrementalConverter(OutputSpec const& spec, o2::header::Serial dh.dataDescription = matcher.description; dh.subSpecification = matcher.subSpec; dh.payloadSize = parts.At(i)->GetSize(); + dh.runNumber = runNumber; DataProcessingHeader dph{newTimesliceId, 0}; if (*timesliceId != newTimesliceId) { @@ -977,11 +990,18 @@ DataProcessorSpec specifyFairMQDeviceOutputProxy(char const* name, if (channelName != outputChannelName) { continue; } + + uint32_t runNumber = 0; + try { + runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } DataHeader dh; dh.dataOrigin = "DPL"; dh.dataDescription = "EOS"; dh.subSpecification = 0; dh.payloadSize = 0; + dh.runNumber = runNumber; dh.payloadSerializationMethod = o2::header::gSerializationMethodNone; dh.tfCounter = 0; dh.firstTForbit = 0; @@ -1091,12 +1111,18 @@ DataProcessorSpec specifyFairMQDeviceMultiOutputProxy(char const* name, if (!checkChannel(channelName)) { continue; } + uint32_t runNumber = 0; + try { + runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } DataHeader dh; dh.dataOrigin = "DPL"; dh.dataDescription = "EOS"; dh.subSpecification = 0; dh.payloadSize = 0; dh.payloadSerializationMethod = o2::header::gSerializationMethodNone; + dh.runNumber = runNumber; dh.tfCounter = 0; dh.firstTForbit = 0; SourceInfoHeader sih; From fcdf98dec8cf81dbaa3e982e4b4a528d4b059762 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 10 Apr 2025 08:52:42 +0200 Subject: [PATCH 0208/1764] ITS3: Fix matrix generation in helper class (#13895) * ITS: change layer name to debug severity Signed-off-by: Felix Schlepper * ITS: ITS3 matrix generation fix + demoting&removal of logging Signed-off-by: Felix Schlepper * ITS3: Propagate rename to ITS helper class Signed-off-by: Felix Schlepper * ITS3: make ITSRESPONSE cached var Allows reusing this variable later on. Signed-off-by: Felix Schlepper * ITS: digiparams make print func virtual Signed-off-by: Felix Schlepper --------- Signed-off-by: Felix Schlepper --- .../ITSMFT/ITS/base/src/GeometryTGeo.cxx | 100 ++++++++---------- .../ITSMFT/ITS/simulation/src/Detector.cxx | 6 +- .../data/AlpideResponseData/CMakeLists.txt | 4 +- .../include/ITSMFTSimulation/DigiParams.h | 20 ++-- 4 files changed, 58 insertions(+), 72 deletions(-) diff --git a/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx b/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx index b52fd8f58320f..89b4d63729543 100644 --- a/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx +++ b/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx @@ -24,8 +24,6 @@ #ifdef ENABLE_UPGRADES #include "ITS3Base/SpecsV2.h" -#include "ITS3Base/SegmentationSuperAlpide.h" -using SuperSegmentation = o2::its3::SegmentationSuperAlpide; #endif #include // for TGeoBBox @@ -420,33 +418,20 @@ TGeoHMatrix* GeometryTGeo::extractMatrixSensor(int index) const static int chipInGlo{0}; // account for the difference between physical sensitive layer (where charge collection is simulated) and effective sensor thicknesses + // in the ITS3 case this accounted by specialized functions double delta = Segmentation::SensorLayerThickness - Segmentation::SensorLayerThicknessEff; -#ifdef ENABLE_UPGRADES - if (mIsLayerITS3[getLayer(index)]) { - delta = its3::SegmentationSuperAlpide::mSensorLayerThickness - its3::SegmentationSuperAlpide::mSensorLayerThicknessEff; - } -#endif - static TGeoTranslation tra(0., 0.5 * delta, 0.); - +#ifdef ENABLE_UPGRADES // only apply for non ITS3 OB layers + if (!mIsLayerITS3[getLayer(index)]) { + matTmp *= tra; + } +#else matTmp *= tra; +#endif return &matTmp; } -//__________________________________________________________________________ -const o2::math_utils::Transform3D GeometryTGeo::getT2LMatrixITS3(int isn, float alpha) -{ - // create for sensor isn the TGeo matrix for Tracking to Local frame transformations - static TGeoHMatrix t2l; - t2l.Clear(); - t2l.RotateZ(alpha * RadToDeg()); // rotate in direction of normal to the tangent to the cylinder - const TGeoHMatrix& matL2G = getMatrixL2G(isn); - const auto& matL2Gi = matL2G.Inverse(); - t2l.MultiplyLeft(&matL2Gi); - return Mat3D(t2l); -} - //__________________________________________________________________________ void GeometryTGeo::Build(int loadTrans) { @@ -492,23 +477,6 @@ void GeometryTGeo::Build(int loadTrans) mLastChipIndex[i] = numberOfChips - 1; } - LOGP(debug, "Summary of extracted Geometry:"); - LOGP(debug, " There are {} Layers and {} HalfBarrels", mNumberOfLayers, mNumberOfHalfBarrels); - for (int i = 0; i < mNumberOfLayers; i++) { - LOGP(debug, " Layer {}: {:*^30}", i, "START"); - LOGP(debug, " - mNumberOfStaves={}", mNumberOfStaves[i]); - LOGP(debug, " - mNumberOfChipsPerStave={}", mNumberOfChipsPerStave[i]); - LOGP(debug, " - mNumberOfHalfStaves={}", mNumberOfHalfStaves[i]); - LOGP(debug, " - mNumberOfChipsPerHalfStave={}", mNumberOfChipsPerHalfStave[i]); - LOGP(debug, " - mNumberOfModules={}", mNumberOfModules[i]); - LOGP(debug, " - mNumberOfChipsPerModules={}", mNumberOfChipsPerModule[i]); - LOGP(debug, " - mNumberOfChipsPerLayer={}", mNumberOfChipsPerLayer[i]); - LOGP(debug, " - mNumberOfChipsPerHalfBarrel={}", mNumberOfChipsPerHalfBarrel[i]); - LOGP(debug, " - mLastChipIndex={}", mLastChipIndex[i]); - LOGP(debug, " Layer {}: {:*^30}", i, "END"); - } - LOGP(debug, "In total there {} chips registered", numberOfChips); - #ifdef ENABLE_UPGRADES if (std::any_of(mIsLayerITS3.cbegin(), mIsLayerITS3.cend(), [](auto b) { return b; })) { LOGP(info, "Found active IT3 layers -> Renaming Detector ITS to IT3"); @@ -880,34 +848,39 @@ void GeometryTGeo::extractSensorXAlpha(int isn, float& x, float& alp) const TGeoHMatrix* matL2G = extractMatrixSensor(isn); double locA[3] = {-100., 0., 0.}, locB[3] = {100., 0., 0.}, gloA[3], gloB[3]; - int iLayer = getLayer(isn); + double xp{0}, yp{0}; #ifdef ENABLE_UPGRADES - if (mIsLayerITS3[iLayer]) { - // We need to calcualte the line tangent at the mid-point in the geometry + if (int iLayer = getLayer(isn); mIsLayerITS3[iLayer]) { + // For a TGeoTubeSeg the local coordinate system is defined at the origin + // of the circle of the side, since in our implementation we rotated the geometry a bit const auto radius = o2::its3::constants::radii[iLayer]; const auto phi1 = o2::its3::constants::tile::width / radius; const auto phi2 = o2::its3::constants::pixelarray::width / radius + phi1; const auto phi3 = (phi2 - phi1) / 2.; // mid-point in phi - const auto x = radius * std::cos(phi3); - const auto y = radius * std::sin(phi3); - // For the tangent we make the parametric line equation y = m * x - c - const auto m = x / y; - const auto c = y - m * x; - // Now we can given any x calulate points along this line, we pick points far away, - // the calculation of the normal should work then below. - locA[1] = m * locA[0] + c; - locB[1] = m * locB[0] + c; - } -#endif - + locA[0] = radius * std::cos(phi3); + locA[1] = radius * std::sin(phi3); + matL2G->LocalToMaster(locA, gloA); + xp = gloA[0]; + yp = gloA[1]; + } else { + matL2G->LocalToMaster(locA, gloA); + matL2G->LocalToMaster(locB, gloB); + double dx = gloB[0] - gloA[0], dy = gloB[1] - gloA[1]; + double t = (gloB[0] * dx + gloB[1] * dy) / (dx * dx + dy * dy); + xp = gloB[0] - dx * t; + yp = gloB[1] - dy * t; + } +#else // just ITS2 part matL2G->LocalToMaster(locA, gloA); matL2G->LocalToMaster(locB, gloB); double dx = gloB[0] - gloA[0], dy = gloB[1] - gloA[1]; double t = (gloB[0] * dx + gloB[1] * dy) / (dx * dx + dy * dy); - double xp = gloB[0] - dx * t, yp = gloB[1] - dy * t; - x = Sqrt(xp * xp + yp * yp); - alp = ATan2(yp, xp); + xp = gloB[0] - dx * t; + yp = gloB[1] - dy * t; +#endif + x = std::hypot(xp, yp); + alp = std::atan2(yp, xp); o2::math_utils::bringTo02Pi(alp); } @@ -926,6 +899,19 @@ TGeoHMatrix& GeometryTGeo::createT2LMatrix(int isn) return t2l; } +//__________________________________________________________________________ +const o2::math_utils::Transform3D GeometryTGeo::getT2LMatrixITS3(int isn, float alpha) +{ + // create for sensor isn the TGeo matrix for Tracking to Local frame transformations with correction for effective thickness + static TGeoHMatrix t2l; + t2l.Clear(); + t2l.RotateZ(alpha * RadToDeg()); // rotate in direction of normal to the tangent to the cylinder + const TGeoHMatrix& matL2G = getMatrixL2G(isn); + const auto& matL2Gi = matL2G.Inverse(); + t2l.MultiplyLeft(&matL2Gi); + return Mat3D(t2l); +} + //__________________________________________________________________________ int GeometryTGeo::extractVolumeCopy(const char* name, const char* prefix) const { diff --git a/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx b/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx index bf2e997794ee4..2304a9102092a 100644 --- a/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx +++ b/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx @@ -190,7 +190,7 @@ Detector::Detector(Bool_t active, TString name) } else { mLayerName[j].Form("%s%d", GeometryTGeo::getITSSensorPattern(), j); // See V3Layer } - LOGP(info, "{}: mLayerName={}", j, mLayerName[j].Data()); + LOGP(debug, "{}: mLayerName={}", j, mLayerName[j].Data()); } if (mNumberLayers > 0) { // if not, we'll Fatal-ize in CreateGeometry @@ -723,8 +723,8 @@ void Detector::defineLayer(Int_t nlay, Double_t phi0, Double_t r, Int_t nstav, I // Return: // none. - LOG(info) << "L# " << nlay << " Phi:" << phi0 << " R:" << r << " Nst:" << nstav << " Nunit:" << nunit - << " Lthick:" << lthick << " Dthick:" << dthick << " DetID:" << dettypeID << " B:" << buildLevel; + LOG(debug) << "L# " << nlay << " Phi:" << phi0 << " R:" << r << " Nst:" << nstav << " Nunit:" << nunit + << " Lthick:" << lthick << " Dthick:" << dthick << " DetID:" << dettypeID << " B:" << buildLevel; if (nlay >= mNumberLayers || nlay < 0) { LOG(error) << "Wrong layer number " << nlay; diff --git a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt index 381e4f4b54c01..d1f3e756394b1 100644 --- a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt +++ b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt @@ -20,10 +20,10 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_SOURCE_DI if(ITSRESPONSE) message(STATUS "ITSRESPONSE option provided, setting ITSRESPONSE_DIR from it: " ${ITSRESPONSE}) - set(ITSRESPONSE_DIR ${ITSRESPONSE}) + set(ITSRESPONSE_DIR ${ITSRESPONSE} CACHE PATH "ITSResponse directory") else() message(STATUS "ITSRESPONSE option not provided, setting ITSRESPONSE_DIR from environment ITSRESPONSE_ROOT: " $ENV{ITSRESPONSE_ROOT}) - set(ITSRESPONSE_DIR $ENV{ITSRESPONSE_ROOT}) + set(ITSRESPONSE_DIR $ENV{ITSRESPONSE_ROOT} CACHE PATH "ITSResponse directory") endif() add_custom_command(TARGET O2exe-alpide-response-generator POST_BUILD diff --git a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h index 7772c47237ae8..b27739c26bc4d 100644 --- a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h +++ b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h @@ -96,18 +96,18 @@ class DigiParams const SignalShape& getSignalShape() const { return mSignalShape; } SignalShape& getSignalShape() { return (SignalShape&)mSignalShape; } - void print() const; + virtual void print() const; private: static constexpr double infTime = 1e99; - bool mIsContinuous = false; ///< flag for continuous simulation - float mNoisePerPixel = 1.e-8; ///< ALPIDE Noise per chip - int mROFrameLengthInBC = 0; ///< ROF length in BC for continuos mode - float mROFrameLength = 0; ///< length of RO frame in ns - float mStrobeDelay = 0.; ///< strobe start (in ns) wrt ROF start - float mStrobeLength = 0; ///< length of the strobe in ns (sig. over threshold checked in this window only) - double mTimeOffset = -2 * infTime; ///< time offset (in seconds!) to calculate ROFrame from hit time - int mROFrameBiasInBC = 0; ///< misalignment of the ROF start in BC + bool mIsContinuous = false; ///< flag for continuous simulation + float mNoisePerPixel = 1.e-8; ///< ALPIDE Noise per chip + int mROFrameLengthInBC = 0; ///< ROF length in BC for continuos mode + float mROFrameLength = 0; ///< length of RO frame in ns + float mStrobeDelay = 0.; ///< strobe start (in ns) wrt ROF start + float mStrobeLength = 0; ///< length of the strobe in ns (sig. over threshold checked in this window only) + double mTimeOffset = -2 * infTime; ///< time offset (in seconds!) to calculate ROFrame from hit time + int mROFrameBiasInBC = 0; ///< misalignment of the ROF start in BC int mChargeThreshold = 150; ///< charge threshold in Nelectrons int mMinChargeToAccount = 15; ///< minimum charge contribution to account int mNSimSteps = 7; ///< number of steps in response simulation @@ -125,7 +125,7 @@ class DigiParams float mROFrameLengthInv = 0; ///< inverse length of RO frame in ns float mNSimStepsInv = 0; ///< its inverse - ClassDefNV(DigiParams, 2); + ClassDef(DigiParams, 2); }; } // namespace itsmft } // namespace o2 From 04b2596a380cba6b4a5aae332ebace594231e21b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 09:13:35 +0200 Subject: [PATCH 0209/1764] DPL: do not compute GUI metrics if we are in online mode (#14163) --- Framework/Core/src/CommonServices.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index e13f1cb2094b7..9d30dac8c4de8 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -848,7 +848,9 @@ auto flushMetrics(ServiceRegistryRef registry, DataProcessingStats& stats) -> vo } monitoring.send(std::move(metric)); }); - relayer.sendContextState(); + if (DefaultsHelpers::onlineDeploymentMode() == false) { + relayer.sendContextState(); + } monitoring.flushBuffer(); O2_SIGNPOST_END(monitoring_service, sid, "flush", "done flushing metrics"); }; From 0a831b2817e686e14d9b5129f6fd0fc45ea035db Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 14:28:28 +0200 Subject: [PATCH 0210/1764] DPL: extend DataRefUtils::match to support multiple headers (#14160) --- .../Core/include/Framework/DataRefUtils.h | 23 ++++++++++++++----- .../include/Framework/InputRecordWalker.h | 5 ++-- .../include/DPLUtils/DPLRawPageSequencer.h | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Framework/Core/include/Framework/DataRefUtils.h b/Framework/Core/include/Framework/DataRefUtils.h index 4c1bd0ed7ed10..d50699badc63b 100644 --- a/Framework/Core/include/Framework/DataRefUtils.h +++ b/Framework/Core/include/Framework/DataRefUtils.h @@ -11,6 +11,7 @@ #ifndef O2_FRAMEWORK_DATAREFUTILS_H_ #define O2_FRAMEWORK_DATAREFUTILS_H_ +#include "Framework/DataDescriptorMatcher.h" #include "Framework/DataRef.h" #include "Framework/RootSerializationSupport.h" #include "Framework/SerializationMethods.h" @@ -33,6 +34,9 @@ class ConfigurableParam; namespace o2::framework { +template +concept DataHeaderLike = requires(H& dh) {dh.dataOrigin; dh.dataDescription; dh.subSpecification; }; + // FIXME: Should enforce the fact that DataRefs are read only... struct DataRefUtils { @@ -52,7 +56,7 @@ struct DataRefUtils { if ((payloadSize % sizeof(T)) != 0) { throw runtime_error("Cannot extract POD from message as size do not match"); } - //FIXME: provide a const collection + // FIXME: provide a const collection return gsl::span(reinterpret_cast(const_cast(ref.payload)), payloadSize / sizeof(T)); } else if constexpr (has_root_dictionary::value == true && is_messageable::value == false) { @@ -220,17 +224,24 @@ struct DataRefUtils { return ref.spec != nullptr && ref.spec->binding == binding; } - /// check if the O2 message referred by DataRef matches a particular - /// input spec. The DataHeader is retrieved from the header message and matched - /// against @ref spec parameter. - static bool match(DataRef const& ref, InputSpec const& spec) + template + static bool matchHeader(DataRef const& ref, InputSpec const& spec) { - auto dh = DataRefUtils::getHeader(ref); + auto const* dh = o2::header::get(ref.header); if (dh == nullptr) { return false; } return DataSpecUtils::match(spec, dh->dataOrigin, dh->dataDescription, dh->subSpecification); } + + /// check if the O2 message referred by DataRef matches a particular + /// input spec. The DataHeader is retrieved from the header message and matched + /// against @ref spec parameter. + template + static bool match(DataRef const& ref, InputSpec const& spec) + { + return (DataRefUtils::matchHeader(ref, spec) || ... || matchHeader(ref, spec)); + } }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/InputRecordWalker.h b/Framework/Core/include/Framework/InputRecordWalker.h index a67a7dfb04820..4d36a1f17bc82 100644 --- a/Framework/Core/include/Framework/InputRecordWalker.h +++ b/Framework/Core/include/Framework/InputRecordWalker.h @@ -12,11 +12,11 @@ #define FRAMEWORK_INPUTRECORDWALKER_H /// @file InputRecordWalker.h -/// @author Matthias Richter /// @since 2020-03-25 /// @brief A helper class to iteratate over all parts of all input routes #include "Framework/InputRecord.h" +#include "Framework/DataRefUtils.h" namespace o2::framework { @@ -49,6 +49,7 @@ namespace o2::framework /// for (auto const& ref : InputRecordWalker(inputs, filter)) { /// // do something with the data /// } +template class InputRecordWalker { public: @@ -131,7 +132,7 @@ class InputRecordWalker if (mFilterSpecs.size() > 0) { bool isSelected = false; for (auto const& spec : mFilterSpecs) { - if ((isSelected = DataRefUtils::match(*mCurrent, spec)) == true) { + if ((isSelected = DataRefUtils::match(*mCurrent, spec)) == true) { break; } } diff --git a/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h b/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h index 785dc9e04bd45..2fb8374e45c12 100644 --- a/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h +++ b/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h @@ -191,7 +191,7 @@ class DPLRawPageSequencer } private: - InputRecordWalker mInput; + InputRecordWalker<> mInput; template void forwardInternal(Predicate pred, Inserter inserter, const char* data, size_t size, const o2::header::DataHeader* dh) From b78b50c251ad7dca06d844c4c4860954b08b113b Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Thu, 10 Apr 2025 15:28:11 +0200 Subject: [PATCH 0211/1764] ctpdev: orbitreset and orbitsox via zmq II (#14147) * dev: finishing orbitsox and orbitreset via zmq * clang * fix * fix * dev: removed infologger and using LOG(important) * clang * fix --- .../include/CTPWorkflowScalers/RunManager.h | 1 + .../CTP/workflowScalers/src/RunManager.cxx | 39 ++++++++++++++++++- .../workflowScalers/src/ctpCCDBManager.cxx | 5 ++- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h index e0b204e6c4ce5..72fb9c2056367 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h @@ -18,6 +18,7 @@ #include "DataFormatsCTP/Configuration.h" #include "BookkeepingApi/BkpClientFactory.h" #include "BookkeepingApi/BkpClient.h" + using namespace o2::bkp::api; namespace o2 { diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index ac3eda60094e9..5d0b906e28088 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -18,6 +18,7 @@ #include #include "CommonUtils/StringUtils.h" #include + using namespace o2::ctp; /// /// Active run to keep cfg and saclers of active runs @@ -232,10 +233,44 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message return 0; } if (topic.find("soxorbit") != std::string::npos) { - return 0; + std::vector tokens = o2::utils::Str::tokenize(message, ' '); + int ret = 0; + if (tokens.size() == 3) { + long timestamp = std::stol(tokens[0]); + uint32_t runnumber = std::stoul(tokens[1]); + uint32_t orbit = std::stoul(tokens[2]); + ret = saveSoxOrbit(runnumber, orbit, timestamp); + std::string logmessage; + if (ret) { + logmessage = "Failed to update CCDB with SOX orbit."; + } else { + logmessage = "CCDB updated with SOX orbit."; + } + LOG(important) << logmessage << " run:" << runnumber << " sox orbit:" << orbit << " ts:" << timestamp; + } else { + LOG(error) << "Topic soxorbit dize !=3: " << message << " token size:" << tokens.size(); + ret = 1; + } + return ret; } if (topic.find("orbitreset") != std::string::npos) { - return 0; + std::vector tokens = o2::utils::Str::tokenize(message, ' '); + int ret = 0; + if (tokens.size() == 1) { + long timestamp = std::stol(tokens[0]); + ret = saveOrbitReset(timestamp); + std::string logmessage; + if (ret) { + logmessage = "Failed to update CCDB with orbitreset. "; + } else { + logmessage = "CCDB updated with orbitreset. "; + } + LOG(important) << logmessage << timestamp; + } else { + LOG(error) << "Topic orbit reset != 2: " << message << " token size:" << tokens.size(); + ret = 1; + } + return ret; } static int nerror = 0; if (topic.find("sox") != std::string::npos) { diff --git a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx index 0d81b896b3e91..cbe8fe5dd675f 100644 --- a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx +++ b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx @@ -122,7 +122,7 @@ int ctpCCDBManager::saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long tim vect.push_back(timestamp); vect.push_back((uint64_t)runNumber); vect.push_back((uint64_t)soxOrbit); - long tmin = timestamp; + long tmin = timestamp / 1000; long tmax = tmin + 381928219; o2::ccdb::CcdbApi api; map metadata; // can be empty @@ -149,9 +149,10 @@ int ctpCCDBManager::saveOrbitReset(long timeStamp) if (timeStamp == 0) { auto now = std::chrono::system_clock::now(); timeStamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + LOG(warn) << "Received timestamp = 0 , using current time:" << timeStamp; } vect.push_back(timeStamp); - long tmin = timeStamp; + long tmin = timeStamp / 1000; long tmax = tmin + 381928219; o2::ccdb::CcdbApi api; map metadata; // can be empty From b17041de14862c743f6a883c8cc66d7743295916 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 9 Apr 2025 16:05:59 +0200 Subject: [PATCH 0212/1764] Possibility to request TPC occupancy map w/o askig for clusters --- .../DataFormatsGlobalTracking/RecoContainer.h | 2 ++ .../GlobalTracking/src/RecoContainer.cxx | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h b/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h index d128467168c92..31d531ef19265 100644 --- a/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h +++ b/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h @@ -225,6 +225,7 @@ struct DataRequest { void requestITSClusters(bool mc); void requestMFTClusters(bool mc); void requestTPCClusters(bool mc); + void requestTPCOccMap(); void requestTPCTriggers(); void requestTOFClusters(bool mc); void requestTRDTracklets(bool mc); @@ -377,6 +378,7 @@ struct RecoContainer { void addITSClusters(o2::framework::ProcessingContext& pc, bool mc); void addMFTClusters(o2::framework::ProcessingContext& pc, bool mc); void addTPCClusters(o2::framework::ProcessingContext& pc, bool mc, bool shmap, bool occmap); + void addTPCOccMap(o2::framework::ProcessingContext& pc); void addTPCTriggers(o2::framework::ProcessingContext& pc); void addTOFClusters(o2::framework::ProcessingContext& pc, bool mc); void addHMPClusters(o2::framework::ProcessingContext& pc, bool mc); diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index c26de2bfda896..39cc05d8a69e7 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -123,7 +123,7 @@ void DataRequest::requestTPCTracks(bool mc) addInput({"trackTPCClRefs", "TPC", "CLUSREFS", 0, Lifetime::Timeframe}); if (requestMap.find("clusTPC") != requestMap.end()) { addInput({"clusTPCshmap", "TPC", "CLSHAREDMAP", 0, Lifetime::Timeframe}); - addInput({"clusTPCoccmap", "TPC", "TPCOCCUPANCYMAP", 0, Lifetime::Timeframe}); + requestTPCOccMap(); } if (mc) { addInput({"trackTPCMCTR", "TPC", "TRACKSMCLBL", 0, Lifetime::Timeframe}); @@ -267,6 +267,12 @@ void DataRequest::requestMFTClusters(bool mc) requestMap["clusMFT"] = mc; } +void DataRequest::requestTPCOccMap() +{ + addInput({"clusTPCoccmap", "TPC", "TPCOCCUPANCYMAP", 0, Lifetime::Timeframe}); + requestMap["TPCOcc"] = false; +} + void DataRequest::requestTPCClusters(bool mc) { addInput({"clusTPC", ConcreteDataTypeMatcher{"TPC", "CLUSTERNATIVE"}, Lifetime::Timeframe}); @@ -275,7 +281,7 @@ void DataRequest::requestTPCClusters(bool mc) } if (requestMap.find("trackTPC") != requestMap.end()) { addInput({"clusTPCshmap", "TPC", "CLSHAREDMAP", 0, Lifetime::Timeframe}); - addInput({"clusTPCoccmap", "TPC", "TPCOCCUPANCYMAP", 0, Lifetime::Timeframe}); + requestTPCOccMap(); } if (mc) { addInput({"clusTPCMC", ConcreteDataTypeMatcher{"TPC", "CLNATIVEMCLBL"}, Lifetime::Timeframe}); @@ -704,10 +710,17 @@ void RecoContainer::collectData(ProcessingContext& pc, const DataRequest& reques addMFTClusters(pc, req->second); } + req = reqMap.find("TPCOcc"); + bool TPCOccDone = false; + if (req != reqMap.end()) { + TPCOccDone = true; + addTPCOccMap(pc); + } + req = reqMap.find("clusTPC"); if (req != reqMap.end()) { auto tracksON = reqMap.find("trackTPC") != reqMap.end(); - addTPCClusters(pc, req->second, tracksON, tracksON); + addTPCClusters(pc, req->second, tracksON, tracksON && (!TPCOccDone)); } req = reqMap.find("trigTPC"); @@ -1100,6 +1113,12 @@ void RecoContainer::addMFTClusters(ProcessingContext& pc, bool mc) } } +//__________________________________________________________ +void RecoContainer::addTPCOccMap(ProcessingContext& pc) +{ + occupancyMapTPC = pc.inputs().get>("clusTPCoccmap"); +} + //__________________________________________________________ void RecoContainer::addTPCClusters(ProcessingContext& pc, bool mc, bool shmap, bool occmap) { From 276c3223609bbc50a8609a4131157a84f89c7e98 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 9 Apr 2025 16:07:10 +0200 Subject: [PATCH 0213/1764] ITSTPC QC does not need TPC clusters, just occ. --- Detectors/GLOQC/src/MatchITSTPCQC.cxx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Detectors/GLOQC/src/MatchITSTPCQC.cxx b/Detectors/GLOQC/src/MatchITSTPCQC.cxx index 86de9cd9c056e..e1832056f072c 100644 --- a/Detectors/GLOQC/src/MatchITSTPCQC.cxx +++ b/Detectors/GLOQC/src/MatchITSTPCQC.cxx @@ -470,7 +470,7 @@ void MatchITSTPCQC::initDataRequest() if (mDoK0QC) { mDataRequest->requestPrimaryVertices(mUseMC); mDataRequest->requestSecondaryVertices(mUseMC); - mDataRequest->requestTPCClusters(false); + mDataRequest->requestTPCOccMap(); } } @@ -478,7 +478,6 @@ void MatchITSTPCQC::initDataRequest() void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) { - // Getting the B field mBz = o2::base::Propagator::Instance()->getNominalBz(); @@ -1058,7 +1057,6 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } else { mTBinClOcc.resize(1); } - auto v0IDs = mRecoCont.getV0sIdx(); auto nv0 = v0IDs.size(); if (nv0 > mRecoCont.getV0s().size()) { From 4f4b5458db0b1b5f1fcbf8e7c6111ab300d6b370 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 20:44:31 +0200 Subject: [PATCH 0214/1764] DPL: Do not compute metrics if the GUI is not available (#14170) --- Framework/Core/src/CommonServices.cxx | 4 +--- Framework/Core/src/DataRelayer.cxx | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index 9d30dac8c4de8..e13f1cb2094b7 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -848,9 +848,7 @@ auto flushMetrics(ServiceRegistryRef registry, DataProcessingStats& stats) -> vo } monitoring.send(std::move(metric)); }); - if (DefaultsHelpers::onlineDeploymentMode() == false) { - relayer.sendContextState(); - } + relayer.sendContextState(); monitoring.flushBuffer(); O2_SIGNPOST_END(monitoring_service, sid, "flush", "done flushing metrics"); }; diff --git a/Framework/Core/src/DataRelayer.cxx b/Framework/Core/src/DataRelayer.cxx index f30866dc0aa1b..c6333350d6da7 100644 --- a/Framework/Core/src/DataRelayer.cxx +++ b/Framework/Core/src/DataRelayer.cxx @@ -1034,6 +1034,9 @@ uint64_t DataRelayer::getCreationTimeForSlot(TimesliceSlot slot) void DataRelayer::sendContextState() { + if (!mContext.get().driverHasGUI) { + return; + } std::scoped_lock lock(mMutex); auto& states = mContext.get(); for (size_t ci = 0; ci < mTimesliceIndex.size(); ++ci) { From 8688938bd04ddeb7227b43ff136d39f463b60e37 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 20:46:13 +0200 Subject: [PATCH 0215/1764] DataSampling: make sure the original DataHeader is available in the sampled data (#14164) --- .../include/DataSampling/DataSamplingHeader.h | 13 ++++-- .../include/DataSampling/Dispatcher.h | 4 +- .../DataSampling/src/DataSamplingHeader.cxx | 13 +++--- Utilities/DataSampling/src/Dispatcher.cxx | 7 ++-- .../test/test_DataSamplingHeader.cxx | 42 +++++++++++-------- 5 files changed, 47 insertions(+), 32 deletions(-) diff --git a/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h b/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h index adc2c97759f52..0cc96e2125dec 100644 --- a/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h +++ b/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h @@ -41,8 +41,15 @@ struct DataSamplingHeader : public header::BaseHeader { uint32_t totalEvaluatedMessages = 0; DeviceIDType deviceID = ""; - DataSamplingHeader(); - DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID); + /// Presampled description for the data. Copied from the original DataHeader. + header::DataDescription dataDescription; + /// Presampled origin for the data. Copied from the original DataHeader. + header::DataOrigin dataOrigin; + /// Presampled subSpecification for the data. + header::DataHeader::SubSpecificationType subSpecification; + + DataSamplingHeader() = delete; + DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID, header::DataHeader const& original); DataSamplingHeader(const DataSamplingHeader&) = default; DataSamplingHeader& operator=(const DataSamplingHeader&) = default; @@ -51,4 +58,4 @@ struct DataSamplingHeader : public header::BaseHeader { } // namespace o2::utilities -#endif //ALICEO2_DATASAMPLINGHEADER_H +#endif // ALICEO2_DATASAMPLINGHEADER_H diff --git a/Utilities/DataSampling/include/DataSampling/Dispatcher.h b/Utilities/DataSampling/include/DataSampling/Dispatcher.h index d92876f4c6125..1d34269f87536 100644 --- a/Utilities/DataSampling/include/DataSampling/Dispatcher.h +++ b/Utilities/DataSampling/include/DataSampling/Dispatcher.h @@ -64,7 +64,7 @@ class Dispatcher : public framework::Task framework::Options getOptions(); private: - DataSamplingHeader prepareDataSamplingHeader(const DataSamplingPolicy& policy); + DataSamplingHeader prepareDataSamplingHeader(const DataSamplingPolicy& policy, header::DataHeader const& original); header::Stack extractAdditionalHeaders(const char* inputHeaderStack) const; void reportStats(monitoring::Monitoring& monitoring) const; void send(framework::DataAllocator& dataAllocator, const framework::DataRef& inputData, const framework::Output& output) const; @@ -78,4 +78,4 @@ class Dispatcher : public framework::Task } // namespace o2::utilities -#endif //ALICEO2_DISPATCHER_H +#endif // ALICEO2_DISPATCHER_H diff --git a/Utilities/DataSampling/src/DataSamplingHeader.cxx b/Utilities/DataSampling/src/DataSamplingHeader.cxx index 392e37a5d9117..204d4aa2b56a6 100644 --- a/Utilities/DataSampling/src/DataSamplingHeader.cxx +++ b/Utilities/DataSampling/src/DataSamplingHeader.cxx @@ -19,16 +19,15 @@ namespace o2::utilities { -DataSamplingHeader::DataSamplingHeader() : BaseHeader(sizeof(DataSamplingHeader), sHeaderType, sSerializationMethod, sVersion) -{ -} - -DataSamplingHeader::DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID) +DataSamplingHeader::DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID, header::DataHeader const& _original) : BaseHeader(sizeof(DataSamplingHeader), sHeaderType, sSerializationMethod, sVersion), sampleTimeUs(_sampleTimeUs), totalAcceptedMessages(_totalAcceptedMessages), totalEvaluatedMessages(_totalEvaluatedMessages), - deviceID(_deviceID) + deviceID(_deviceID), + dataDescription(_original.dataDescription), + dataOrigin(_original.dataOrigin), + subSpecification(_original.subSpecification) { } @@ -42,4 +41,4 @@ const uint32_t o2::utilities::DataSamplingHeader::sVersion = 1; const o2::header::HeaderType o2::utilities::DataSamplingHeader::sHeaderType = header::String2("DataSamp"); const o2::header::SerializationMethod o2::utilities::DataSamplingHeader::sSerializationMethod = o2::header::gSerializationMethodNone; -} // namespace o2::utilities \ No newline at end of file +} // namespace o2::utilities diff --git a/Utilities/DataSampling/src/Dispatcher.cxx b/Utilities/DataSampling/src/Dispatcher.cxx index 28ff4d5568da9..38ad15f5fd752 100644 --- a/Utilities/DataSampling/src/Dispatcher.cxx +++ b/Utilities/DataSampling/src/Dispatcher.cxx @@ -99,7 +99,7 @@ void Dispatcher::run(ProcessingContext& ctx) // a "TST/RAWDATA/*" output. if (auto route = policy->match(inputMatcher); route != nullptr && policy->decide(firstPart)) { auto routeAsConcreteDataType = DataSpecUtils::asConcreteDataTypeMatcher(*route); - auto dsheader = prepareDataSamplingHeader(*policy); + auto dsheader = prepareDataSamplingHeader(*policy, *firstInputHeader); for (const auto& part : inputIt) { if (part.header != nullptr) { // We copy every header which is not DataHeader or DataProcessingHeader, @@ -144,7 +144,7 @@ void Dispatcher::reportStats(Monitoring& monitoring) const monitoring.send(Metric{dispatcherTotalAcceptedMessages, "Dispatcher_messages_passed", Verbosity::Prod}.addTag(tags::Key::Subsystem, tags::Value::DataSampling)); } -DataSamplingHeader Dispatcher::prepareDataSamplingHeader(const DataSamplingPolicy& policy) +DataSamplingHeader Dispatcher::prepareDataSamplingHeader(const DataSamplingPolicy& policy, header::DataHeader const& original) { uint64_t sampleTime = static_cast(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count()); @@ -152,7 +152,8 @@ DataSamplingHeader Dispatcher::prepareDataSamplingHeader(const DataSamplingPolic sampleTime, policy.getTotalAcceptedMessages(), policy.getTotalEvaluatedMessages(), - mDeviceID}; + mDeviceID, + original}; } header::Stack Dispatcher::extractAdditionalHeaders(const char* inputHeaderStack) const diff --git a/Utilities/DataSampling/test/test_DataSamplingHeader.cxx b/Utilities/DataSampling/test/test_DataSamplingHeader.cxx index 48ab5ba953eec..377e9d855467d 100644 --- a/Utilities/DataSampling/test/test_DataSamplingHeader.cxx +++ b/Utilities/DataSampling/test/test_DataSamplingHeader.cxx @@ -21,57 +21,62 @@ using namespace o2::utilities; using namespace o2::header; -BOOST_AUTO_TEST_CASE(DataSamplingHeaderDefault) -{ - DataSamplingHeader header; - - BOOST_CHECK_EQUAL(header.sampleTimeUs, 0); - BOOST_CHECK_EQUAL(header.totalAcceptedMessages, 0); - BOOST_CHECK_EQUAL(header.totalEvaluatedMessages, 0); - BOOST_CHECK_EQUAL(strcmp(header.deviceID.str, ""), 0); -} - BOOST_AUTO_TEST_CASE(DataSamplingHeaderInit) { - DataSamplingHeader header{123, 456, 789, "abc"}; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader header{123, 456, 789, "abc", original}; BOOST_CHECK_EQUAL(header.sampleTimeUs, 123); BOOST_CHECK_EQUAL(header.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(header.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(header.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(header.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(header.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(header.subSpecification, 1); } BOOST_AUTO_TEST_CASE(DataSamplingHeaderCopy) { - DataSamplingHeader header{123, 456, 789, "abc"}; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader header{123, 456, 789, "abc", original}; DataSamplingHeader copy(header); BOOST_CHECK_EQUAL(copy.sampleTimeUs, 123); BOOST_CHECK_EQUAL(copy.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(copy.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(copy.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(copy.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(copy.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(copy.subSpecification, 1); } BOOST_AUTO_TEST_CASE(DataSamplingHeaderAssignement) { - DataSamplingHeader first{123, 456, 789, "abc"}; - DataSamplingHeader second; - second = first; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader first{123, 456, 789, "abc", original}; + DataSamplingHeader second = first; BOOST_CHECK_EQUAL(first.sampleTimeUs, 123); BOOST_CHECK_EQUAL(first.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(first.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(first.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(first.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(first.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(first.subSpecification, 1); BOOST_CHECK_EQUAL(second.sampleTimeUs, 123); BOOST_CHECK_EQUAL(second.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(second.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(second.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(second.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(second.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(second.subSpecification, 1); } BOOST_AUTO_TEST_CASE(DataSamplingHeaderOnStack) { - DataSamplingHeader header{123, 456, 789, "abc"}; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader header{123, 456, 789, "abc", original}; Stack headerStack{header}; const auto* dsHeaderFromStack = get(headerStack.data()); @@ -81,4 +86,7 @@ BOOST_AUTO_TEST_CASE(DataSamplingHeaderOnStack) BOOST_CHECK_EQUAL(dsHeaderFromStack->totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(dsHeaderFromStack->totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(dsHeaderFromStack->deviceID.str, "abc"), 0); -} \ No newline at end of file + BOOST_CHECK_EQUAL(strcmp(dsHeaderFromStack->dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(dsHeaderFromStack->dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(dsHeaderFromStack->subSpecification, 1); +} From a4e08418b60179e006690b1c056f5d3e553b934a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 22:40:19 +0200 Subject: [PATCH 0216/1764] DPL: exclude tfCounter == 0 messages for EOS (#14169) * DPL: exclude tfCounter == 0 messages for EOS --- Framework/Core/src/ExternalFairMQDeviceProxy.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx index e67e484f7faf5..cfc445725a92d 100644 --- a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx +++ b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx @@ -534,7 +534,7 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL timingInfo.runNumber = dh->runNumber; timingInfo.tfCounter = dh->tfCounter; LOG(debug) << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); - if (dh->runNumber == 0 || (dh->tfCounter == 0 && o2::header::get(header) == nullptr) || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { + if (dh->runNumber == 0 || (dh->tfCounter == 0 && dh->dataDescription.as() != "EOS") || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { LOG(error) << "INVALID runNumber / tfCounter: runNumber " << dh->runNumber << ", tfCounter " << dh->tfCounter << ", FMQ runNumber " << fmqRunNumber << " for msgidx " << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); From 4655f501f3a603da52150084145b31c225f5b4e5 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 14:31:11 +0200 Subject: [PATCH 0217/1764] TPC cluster/digits helper can walk over sampled data --- DataFormats/Detectors/TPC/CMakeLists.txt | 1 + .../Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DataFormats/Detectors/TPC/CMakeLists.txt b/DataFormats/Detectors/TPC/CMakeLists.txt index b8b93c308e85d..81b1d5efad59a 100644 --- a/DataFormats/Detectors/TPC/CMakeLists.txt +++ b/DataFormats/Detectors/TPC/CMakeLists.txt @@ -34,6 +34,7 @@ o2_add_library( O2::ReconstructionDataFormats O2::CommonDataFormat O2::Headers + O2::DataSampling O2::Algorithm) o2_target_root_dictionary( diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h index 30b40ed70b9c7..f4a318bc30101 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h @@ -21,6 +21,7 @@ #include "Framework/DataRefUtils.h" #include #include "Framework/InputRecordWalker.h" +#include "DataSampling/DataSamplingHeader.h" #include "DataFormatsTPC/TrackTPC.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" @@ -80,7 +81,7 @@ static auto getWorkflowTPCInput(o2::framework::ProcessingContext& pc, int verbos if (do_digits) { std::fill(inputDigitsMCIndex.begin(), inputDigitsMCIndex.end(), -1); } - for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { + for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { auto const* sectorHeader = o2::framework::DataRefUtils::getHeader(ref); if (sectorHeader == nullptr) { // FIXME: think about error policy @@ -127,7 +128,7 @@ static auto getWorkflowTPCInput(o2::framework::ProcessingContext& pc, int verbos {"check", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "CLUSTERNATIVE"}, o2::framework::Lifetime::Timeframe}, }; unsigned long recvMask = 0; - for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { + for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { auto const* sectorHeader = o2::framework::DataRefUtils::getHeader(ref); if (sectorHeader == nullptr) { throw std::runtime_error("sector header missing on header stack"); From 356d6990cd1fdc13e28dbbb71ad0a4777cd1dd8b Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 21:54:25 +0200 Subject: [PATCH 0218/1764] Add in-place replaceAll method to StrUtils --- Common/Utils/include/CommonUtils/StringUtils.h | 3 +++ Common/Utils/src/StringUtils.cxx | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/Common/Utils/include/CommonUtils/StringUtils.h b/Common/Utils/include/CommonUtils/StringUtils.h index 7a2edbf3b2f53..c68e441d5b1c4 100644 --- a/Common/Utils/include/CommonUtils/StringUtils.h +++ b/Common/Utils/include/CommonUtils/StringUtils.h @@ -146,6 +146,9 @@ struct Str { return s.str(); } + // replace all occurencies of from by to, return count + static int replaceAll(std::string& s, const std::string& from, const std::string& to); + // generate random string of given length, suitable for file names static std::string getRandomString(int length); diff --git a/Common/Utils/src/StringUtils.cxx b/Common/Utils/src/StringUtils.cxx index 03bf68df5a41c..4c0dd30ae6211 100644 --- a/Common/Utils/src/StringUtils.cxx +++ b/Common/Utils/src/StringUtils.cxx @@ -34,6 +34,19 @@ std::vector Str::tokenize(const std::string& src, char delim, bool return tokens; } +// replace all occurencies of from by to, return count +int Str::replaceAll(std::string& s, const std::string& from, const std::string& to) +{ + int count = 0; + size_t pos = 0; + while ((pos = s.find(from, pos)) != std::string::npos) { + s.replace(pos, from.length(), to); + pos += to.length(); // Handles case where 'to' is a substring of 'from' + count++; + } + return count; +} + // generate random string of given lenght, suitable for file names std::string Str::getRandomString(int lenght) { From b75d6433d3513c547588a9f93d0c64d536607311 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 17:08:30 +0200 Subject: [PATCH 0219/1764] o2-ecs-grp-create creates CTP/Config if --original-run passed at SOR Will clone the CTP/Config/Config of (replayed) original-run with the credentials of the new synthetic run --- Detectors/GRP/workflows/CMakeLists.txt | 1 + .../GRP/workflows/src/create-grp-ecs.cxx | 64 +++++++++++++++++-- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/Detectors/GRP/workflows/CMakeLists.txt b/Detectors/GRP/workflows/CMakeLists.txt index ea56cf8270335..1097855a5d579 100644 --- a/Detectors/GRP/workflows/CMakeLists.txt +++ b/Detectors/GRP/workflows/CMakeLists.txt @@ -45,6 +45,7 @@ o2_add_executable(grp-create SOURCES src/create-grp-ecs.cxx PUBLIC_LINK_LIBRARIES O2::DetectorsCommonDataFormats O2::DataFormatsParameters + O2::DataFormatsCTP O2::CommonUtils O2::CCDB Boost::program_options) diff --git a/Detectors/GRP/workflows/src/create-grp-ecs.cxx b/Detectors/GRP/workflows/src/create-grp-ecs.cxx index 95bfb878cee9d..873133e0dd46b 100644 --- a/Detectors/GRP/workflows/src/create-grp-ecs.cxx +++ b/Detectors/GRP/workflows/src/create-grp-ecs.cxx @@ -15,8 +15,10 @@ #include #include #include "DataFormatsParameters/GRPECSObject.h" +#include "DataFormatsCTP/Configuration.h" #include "DetectorsCommonDataFormats/DetID.h" #include "CCDB/CcdbApi.h" +#include "CCDB/BasicCCDBManager.h" #include "CommonUtils/NameConf.h" #include "CommonUtils/StringUtils.h" @@ -31,6 +33,7 @@ enum CCDBRefreshMode { NONE, int createGRPECSObject(const std::string& dataPeriod, int run, + int runOrig, // in case of replay int runTypeI, int nHBPerTF, const std::string& _detsReadout, @@ -44,13 +47,14 @@ int createGRPECSObject(const std::string& dataPeriod, long marginAtSOR, long marginAtEOR, const std::string& ccdbServer = "", + std::string ccdbServerInp = "", const std::string& metaDataStr = "", CCDBRefreshMode refresh = CCDBRefreshMode::NONE) { int retValGLO = 0; int retValRCT = 0; int retValGLOmd = 0; - + int retValCTP = 0; // substitute TRG by CTP std::regex regCTP(R"((^\s*|,\s*)(TRG)(\s*,|\s*$))"); std::string detsReadout{std::regex_replace(_detsReadout, regCTP, "$1CTP$3")}; @@ -78,6 +82,8 @@ int createGRPECSObject(const std::string& dataPeriod, tendVal = tend + marginAtEOR; } GRPECSObject grpecs; + o2::ctp::CTPConfiguration* ctpConfig = nullptr; + o2::ctp::CTPConfiguration ctpConfigNew; grpecs.setTimeStart(tstart); grpecs.setTimeEnd(tend); grpecs.setTimeStartCTP(tstartCTP); @@ -119,10 +125,32 @@ int createGRPECSObject(const std::string& dataPeriod, } }; + if (ccdbServerInp.empty()) { + ccdbServerInp = ccdbServer; + } + if (runOrig > 0 && runOrig != run && tend <= tstart && !ccdbServerInp.empty()) { // create CTP config + try { + auto& bcm = o2::ccdb::BasicCCDBManager::instance(); + bcm.setURL(ccdbServerInp); + bcm.setFatalWhenNull(false); + ctpConfig = bcm.getForRun("CTP/Config/Config", runOrig); + if (!ctpConfig) { + throw std::runtime_error(fmt::format("Failed to access CTP/Config/Config for original run {}", runOrig)); + } + std::string cfstr = ctpConfig->getConfigString(), srun{fmt::format("run {}", run)}, srunOrig{fmt::format("run {}", runOrig)}; + o2::utils::Str::replaceAll(cfstr, srunOrig, srun); + ctpConfigNew.loadConfigurationRun3(cfstr); + ctpConfigNew.setRunNumber(run); + } catch (std::exception e) { + LOGP(error, "Failed to create CTP/Config/Config from the original run {}, reason: {}", runOrig, e.what()); + } + } + toKeyValPairs(metaDataStr); if (!ccdbServer.empty()) { CcdbApi api; + const std::string objPath{"GLO/Config/GRPECS"}; api.init(ccdbServer); metadata["responsible"] = "ECS"; @@ -181,13 +209,33 @@ int createGRPECSObject(const std::string& dataPeriod, } } } + + if (ctpConfig && ctpConfigNew.getRunNumber() == run) { // create CTP config + std::map metadataCTP; + metadataCTP["runNumber"] = fmt::format("{}", run); + metadataCTP["comment"] = fmt::format("cloned from run {}", runOrig); + retValCTP = api.storeAsTFileAny(&ctpConfigNew, "CTP/Config/Config", metadataCTP, tstart, tendVal); + if (retValCTP == 0) { + LOGP(info, "Uploaded to {}/{} with validity {}:{} for SOR:{}/EOR:{}, cloned from run {}", ccdbServer, "CTP/Config/Config", tstart, tendVal, tstart, tend, runOrig); + } else { + LOGP(alarm, "Upload to {}/{} with validity {}:{} for SOR:{}/EOR:{} (cloned from run {}) FAILED, returned with code {}", ccdbServer, "CTP/Config/Config", tstart, tendVal, tstart, tend, runOrig, retValCTP); + } + } } else { // write a local file auto fname = o2::base::NameConf::getGRPECSFileName(); TFile grpF(fname.c_str(), "recreate"); grpF.WriteObjectAny(&grpecs, grpecs.Class(), o2::base::NameConf::CCDBOBJECT.data()); - LOG(info) << "Stored to local file " << fname; + grpF.Close(); + LOGP(info, "Stored GRPECS to local file {}", fname); + if (ctpConfig && ctpConfigNew.getRunNumber() == run) { + std::string ctnpfname = fmt::format("CTPConfig_{}_from_{}.root", run, runOrig); + TFile ctpF(ctnpfname.c_str(), "recreate"); + ctpF.WriteObjectAny(&ctpConfigNew, ctpConfigNew.Class(), o2::base::NameConf::CCDBOBJECT.data()); + ctpF.Close(); + LOGP(info, "Stored CTPConfig to local file {}", ctnpfname); + } } - // + if (refresh != CCDBRefreshMode::NONE && !ccdbServer.empty()) { auto cmd = fmt::format("curl -I -i -s \"{}{}latest/%5Cw%7B3%7D/.*/`date +%s000`/?prepare={}\"", ccdbServer, ccdbServer.back() == '/' ? "" : "/", refresh == CCDBRefreshMode::SYNC ? "sync" : "true"); auto t0 = std::chrono::high_resolution_clock::now(); @@ -195,7 +243,7 @@ int createGRPECSObject(const std::string& dataPeriod, auto t1 = std::chrono::high_resolution_clock::now(); LOGP(info, "Executed [{}] -> {} in {:.3f} s", cmd, res, std::chrono::duration_cast(t1 - t0).count() / 1000.f); } - if (retValGLO != 0 || retValRCT != 0 || retValGLOmd != 0) { + if (retValGLO != 0 || retValRCT != 0 || retValGLOmd != 0 || retValCTP != 0) { return 4; } return 0; @@ -229,10 +277,12 @@ int main(int argc, char** argv) add_option("start-time-ctp", bpo::value()->default_value(0), "run start CTP time in ms, same as ECS if not set or 0"); add_option("end-time-ctp", bpo::value()->default_value(0), "run end CTP time in ms, same as ECS if not set or 0"); add_option("ccdb-server", bpo::value()->default_value("http://alice-ccdb.cern.ch"), "CCDB server for upload, local file if empty"); + add_option("ccdb-server-input", bpo::value()->default_value(""), "CCDB server for inputs (if needed, e.g. CTPConfig), dy default ccdb-server is used"); add_option("meta-data,m", bpo::value()->default_value("")->implicit_value(""), "metadata as key1=value1;key2=value2;.."); add_option("refresh", bpo::value()->default_value("")->implicit_value("async"), R"(refresh server cache after upload: "none" (or ""), "async" (non-blocking) and "sync" (blocking))"); add_option("marginSOR", bpo::value()->default_value(4 * o2::ccdb::CcdbObjectInfo::DAY), "validity at SOR"); add_option("marginEOR", bpo::value()->default_value(10 * o2::ccdb::CcdbObjectInfo::MINUTE), "validity margin to add after EOR"); + add_option("original-run,o", bpo::value()->default_value(0), "if >0, use as the source run to create CTP/Config/Config object"); opt_all.add(opt_general).add(opt_hidden); bpo::store(bpo::command_line_parser(argc, argv).options(opt_all).positional(opt_pos).run(), vm); @@ -253,13 +303,13 @@ int main(int argc, char** argv) } if (vm.count("run") == 0) { std::cerr << "ERROR: " - << "obligator run number is missing" << std::endl; + << "obligatory run number is missing" << std::endl; std::cerr << opt_general << std::endl; exit(3); } if (vm.count("period") == 0) { std::cerr << "ERROR: " - << "obligator data taking period name is missing" << std::endl; + << "obligatory data taking period name is missing" << std::endl; std::cerr << opt_general << std::endl; exit(3); } @@ -278,6 +328,7 @@ int main(int argc, char** argv) int retVal = createGRPECSObject( vm["period"].as(), vm["run"].as(), + vm["original-run"].as(), vm["run-type"].as(), vm["hbf-per-tf"].as(), vm["detectors"].as(), @@ -291,6 +342,7 @@ int main(int argc, char** argv) vm["marginSOR"].as(), vm["marginEOR"].as(), vm["ccdb-server"].as(), + vm["ccdb-server-input"].as(), vm["meta-data"].as(), refresh); From 9e322a95f4f1b2e81c54d71a360a3ef8ca159f0d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 11 Apr 2025 13:38:56 +0200 Subject: [PATCH 0220/1764] DPL Analysis: improve handling of tables with sources (#14172) --- .../Core/include/Framework/AnalysisHelpers.h | 57 +++++++++++++++---- .../Core/include/Framework/AnalysisTask.h | 51 +---------------- Framework/Core/src/WorkflowHelpers.cxx | 18 +++--- 3 files changed, 57 insertions(+), 69 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 985f80cd548bc..55d2490dff1bc 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -13,7 +13,6 @@ #include "Framework/ASoA.h" #include "Framework/DataAllocator.h" -#include "Framework/ExpressionHelpers.h" #include "Framework/IndexBuilderHelpers.h" #include "Framework/InputSpec.h" #include "Framework/Output.h" @@ -28,6 +27,48 @@ #include namespace o2::soa { +template +constexpr auto tableRef2ConfigParamSpec() +{ + return o2::framework::ConfigParamSpec{ + std::string{"input:"} + o2::aod::label(), + framework::VariantType::String, + aod::sourceSpec(), + {"\"\""}}; +} + +namespace +{ +template +inline constexpr auto getSources() +{ + return [] refs>() { + return [](std::index_sequence) { + return std::vector{soa::tableRef2ConfigParamSpec()...}; + }(std::make_index_sequence()); + }.template operator()(); +} + +template +constexpr auto getInputMetadata() -> std::vector +{ + std::vector inputMetadata; + auto inputSources = getSources(); + std::sort(inputSources.begin(), inputSources.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name < b.name; }); + auto last = std::unique(inputSources.begin(), inputSources.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name == b.name; }); + inputSources.erase(last, inputSources.end()); + inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); + return inputMetadata; +} + +template + requires(!soa::with_sources) +constexpr auto getInputMetadata() -> std::vector +{ + return {}; +} +} // namespace + template constexpr auto tableRef2InputSpec() { @@ -35,7 +76,9 @@ constexpr auto tableRef2InputSpec() o2::aod::label(), o2::aod::origin(), o2::aod::description(o2::aod::signature()), - R.version}; + R.version, + framework::Lifetime::Timeframe, + getInputMetadata>::metadata>()}; } template @@ -64,16 +107,6 @@ constexpr auto tableRef2OutputRef() o2::aod::label(), R.version}; } - -template -constexpr auto tableRef2ConfigParamSpec() -{ - return o2::framework::ConfigParamSpec{ - std::string{"input:"} + o2::aod::label(), - framework::VariantType::String, - aod::sourceSpec(), - {"\"\""}}; -} } // namespace o2::soa namespace o2::framework diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index bd1a1cfd88954..c7f3da1948c62 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -65,46 +65,6 @@ concept is_enumeration = is_enumeration_v>; // the contents of an AnalysisTask... namespace { struct AnalysisDataProcessorBuilder { - template - static ConfigParamSpec getSpec() - { - if constexpr (soa::has_metadata>) { - return ConfigParamSpec{std::string{"input:"} + aod::MetadataTrait::metadata::tableLabel(), VariantType::String, aod::MetadataTrait::metadata::sourceSpec(), {"\"\""}}; - } else { - using O1 = framework::pack_element_t<0, typename T::originals>; - return ConfigParamSpec{std::string{"input:"} + aod::MetadataTrait::metadata::tableLabel(), VariantType::String, aod::MetadataTrait::metadata::sourceSpec(), {"\"\""}}; - } - } - - template - static ConfigParamSpec getSpec() - { - return soa::tableRef2ConfigParamSpec(); - } - - template - static inline auto getSources() - { - return [] refs>() { - return [](std::index_sequence) { - return std::vector{soa::tableRef2ConfigParamSpec()...}; - }(std::make_index_sequence()); - }.template operator()(); - } - - template - - static auto getInputMetadata() - { - std::vector inputMetadata; - auto inputSources = getSources(); - std::sort(inputSources.begin(), inputSources.end(), [](ConfigParamSpec const& a, ConfigParamSpec const& b) { return a.name < b.name; }); - auto last = std::unique(inputSources.begin(), inputSources.end(), [](ConfigParamSpec const& a, ConfigParamSpec const& b) { return a.name == b.name; }); - inputSources.erase(last, inputSources.end()); - inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); - return inputMetadata; - } - template static void addGroupingCandidates(std::vector& bk, std::vector& bku) { @@ -130,14 +90,9 @@ struct AnalysisDataProcessorBuilder { template static void addOriginalRef(const char* name, bool value, std::vector& inputs) { - using metadata = typename aod::MetadataTrait>::metadata; - std::vector inputMetadata; - inputMetadata.emplace_back(ConfigParamSpec{std::string{"control:"} + name, VariantType::Bool, value, {"\"\""}}); - if constexpr (soa::with_sources) { - auto inputSources = getInputMetadata(); - inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); - } - DataSpecUtils::updateInputList(inputs, InputSpec{o2::aod::label(), o2::aod::origin(), aod::description(o2::aod::signature()), R.version, Lifetime::Timeframe, inputMetadata}); + auto spec = soa::tableRef2InputSpec(); + spec.metadata.emplace_back(ConfigParamSpec{std::string{"control:"} + name, VariantType::Bool, value, {"\"\""}}); + DataSpecUtils::updateInputList(inputs, std::move(spec)); } /// helpers to append expression information for a single argument diff --git a/Framework/Core/src/WorkflowHelpers.cxx b/Framework/Core/src/WorkflowHelpers.cxx index b18b559fe99fb..652e863f98394 100644 --- a/Framework/Core/src/WorkflowHelpers.cxx +++ b/Framework/Core/src/WorkflowHelpers.cxx @@ -385,6 +385,15 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext auto outputSpecLessThan = [](OutputSpec const& lhs, OutputSpec const& rhs) { return DataSpecUtils::describe(lhs) < DataSpecUtils::describe(rhs); }; std::sort(ac.requestedDYNs.begin(), ac.requestedDYNs.end(), inputSpecLessThan); std::sort(ac.providedDYNs.begin(), ac.providedDYNs.end(), outputSpecLessThan); + + DataProcessorSpec indexBuilder{ + "internal-dpl-aod-index-builder", + {}, + {}, + readers::AODReaderHelpers::indexBuilderCallback(ac.requestedIDXs), + {}}; + AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); + for (auto& input : ac.requestedDYNs) { if (std::none_of(ac.providedDYNs.begin(), ac.providedDYNs.end(), [&input](auto const& x) { return DataSpecUtils::match(input, x); })) { ac.spawnerInputs.emplace_back(input); @@ -397,15 +406,6 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext {}, readers::AODReaderHelpers::aodSpawnerCallback(ac.spawnerInputs), {}}; - - DataProcessorSpec indexBuilder{ - "internal-dpl-aod-index-builder", - {}, - {}, - readers::AODReaderHelpers::indexBuilderCallback(ac.requestedIDXs), - {}}; - - AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); AnalysisSupportHelpers::addMissingOutputsToSpawner({}, ac.spawnerInputs, ac.requestedAODs, aodSpawner); AnalysisSupportHelpers::addMissingOutputsToReader(ac.providedAODs, ac.requestedAODs, aodReader); From 206d9ab0ef4c905694237d9cb36a5d966e284aaf Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 12 Apr 2025 08:13:44 +0200 Subject: [PATCH 0221/1764] ITS3: fix tracking initialisation (#14154) --- .../ITS3/reconstruction/src/TrackingInterface.cxx | 11 +++++++++++ Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx | 1 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx index 10c6b9265a8bb..afb276e956e76 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx @@ -14,6 +14,7 @@ #include "ITSBase/GeometryTGeo.h" #include "ITSMFTBase/DPLAlpideParam.h" #include "DetectorsBase/GRPGeomHelper.h" +#include "Framework/DeviceSpec.h" namespace o2::its3 { @@ -31,7 +32,17 @@ void ITS3TrackingInterface::updateTimeDependentParams(framework::ProcessingConte } auto geom = its::GeometryTGeo::Instance(); geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::T2GRot, o2::math_utils::TransformType::T2G)); + initialise(); getConfiguration(pc); + if (pc.services().get().inputTimesliceId == 0) { // print settings only for the 1st pipeling + o2::its::VertexerParamConfig::Instance().printKeyValues(); + o2::its::TrackerParamConfig::Instance().printKeyValues(); + const auto& trParams = getTracker()->getParameters(); + for (size_t it = 0; it < trParams.size(); it++) { + const auto& par = trParams[it]; + LOGP(info, "recoIter#{} : {}", it, par.asString()); + } + } } } diff --git a/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx index 90f94e625d6ea..5b710a3d11fef 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx @@ -67,7 +67,6 @@ void TrackerDPL::init(InitContext& ic) mITS3TrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), mChainITS->GetITSTrackerTraits(), mChainITS->GetITSTimeframe()); - mITS3TrackingInterface.initialise(); } void TrackerDPL::stop() From 4be2de6e8316d05db24c5439b56ec6f67ce89e88 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 12 Apr 2025 08:14:35 +0200 Subject: [PATCH 0222/1764] ITS3: add metal layer + fix macros (#13894) --- Detectors/Upgrades/ITS3/CMakeLists.txt | 6 +- Detectors/Upgrades/ITS3/README.md | 105 +++++++- .../ITS3/alignment/src/MisalignmentHits.cxx | 1 - Detectors/Upgrades/ITS3/base/CMakeLists.txt | 12 +- .../ITS3/base/include/ITS3Base/ITS3Params.h | 3 +- ...tionSuperAlpide.h => SegmentationMosaix.h} | 133 ++++----- .../ITS3/base/include/ITS3Base/SpecsV2.h | 150 +++++++---- .../Upgrades/ITS3/base/src/ITS3BaseLinkDef.h | 1 - .../ITS3/base/src/SegmentationSuperAlpide.cxx | 20 -- Detectors/Upgrades/ITS3/data/CMakeLists.txt | 25 ++ .../Upgrades/ITS3/macros/test/CMakeLists.txt | 4 +- .../ITS3/macros/test/CheckClusterSize.C | 24 +- .../ITS3/macros/test/CheckClustersITS3.C | 66 +++-- .../ITS3/macros/test/CheckDigitsDensity.C | 16 +- .../ITS3/macros/test/CheckDigitsITS3.C | 21 +- .../Upgrades/ITS3/macros/test/CheckHits.C | 2 - ...erAlpideSegment.C => CheckMosaixSegment.C} | 74 +++-- ...gmentTrans.C => CheckMosaixSegmentTrans.C} | 42 ++- .../ITS3/macros/test/CheckTileNumbering.C | 6 +- .../macros/test/CompareClustersAndDigits.C | 39 +-- .../ITS3/macros/test/CreateDictionariesITS3.C | 115 +++++--- .../ITS3/macros/test/TestSensorGeometry.C | 19 +- .../BuildTopologyDictionary.h | 48 ++-- .../include/ITS3Reconstruction/Clusterer.h | 10 +- .../include/ITS3Reconstruction/IOUtils.h | 23 +- .../include/ITS3Reconstruction/LookUp.h | 20 +- .../ITS3Reconstruction/TopologyDictionary.h | 124 +++++---- .../src/BuildTopologyDictionary.cxx | 254 +++++++++++------- .../ITS3/reconstruction/src/Clusterer.cxx | 21 +- .../ITS3/reconstruction/src/IOUtils.cxx | 3 - .../src/ITS3ReconstructionLinkDef.h | 1 + .../ITS3/reconstruction/src/LookUp.cxx | 21 +- .../reconstruction/src/TopologyDictionary.cxx | 176 ++++++++---- .../Upgrades/ITS3/simulation/CMakeLists.txt | 4 +- .../DescriptorInnerBarrelITS3.h | 6 +- .../include/ITS3Simulation/DigiParams.h | 45 ++++ .../include/ITS3Simulation/Digitizer.h | 31 ++- .../include/ITS3Simulation/ITS3Layer.h | 25 +- .../src/DescriptorInnerBarrelITS3.cxx | 4 +- .../ITS3/simulation/src/DigiParams.cxx | 40 +++ .../ITS3/simulation/src/Digitizer.cxx | 111 +++++--- .../ITS3/simulation/src/ITS3Layer.cxx | 54 ++-- .../simulation/src/ITS3SimulationLinkDef.h | 1 + 43 files changed, 1197 insertions(+), 709 deletions(-) rename Detectors/Upgrades/ITS3/base/include/ITS3Base/{SegmentationSuperAlpide.h => SegmentationMosaix.h} (54%) delete mode 100644 Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx create mode 100644 Detectors/Upgrades/ITS3/data/CMakeLists.txt rename Detectors/Upgrades/ITS3/macros/test/{CheckSuperAlpideSegment.C => CheckMosaixSegment.C} (78%) rename Detectors/Upgrades/ITS3/macros/test/{CheckSuperAlpideSegmentTrans.C => CheckMosaixSegmentTrans.C} (85%) create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h create mode 100644 Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx diff --git a/Detectors/Upgrades/ITS3/CMakeLists.txt b/Detectors/Upgrades/ITS3/CMakeLists.txt index 6965061571da6..73ad4b9d53e37 100644 --- a/Detectors/Upgrades/ITS3/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/CMakeLists.txt @@ -9,11 +9,13 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -#add_compile_options(-O0 -g -fPIC) +#add_compile_options(-O0 -g -fPIC -fsanitize=address) +#add_link_options(-fsanitize=address) -add_subdirectory(macros) +add_subdirectory(data) add_subdirectory(simulation) add_subdirectory(alignment) add_subdirectory(base) add_subdirectory(workflow) add_subdirectory(reconstruction) +add_subdirectory(macros) diff --git a/Detectors/Upgrades/ITS3/README.md b/Detectors/Upgrades/ITS3/README.md index 6d3b0d8d821fb..afcea6a5c3e17 100644 --- a/Detectors/Upgrades/ITS3/README.md +++ b/Detectors/Upgrades/ITS3/README.md @@ -35,7 +35,7 @@ export ALICEO2_CCDB_LOCALCACHE=${PWD}/ccdb Simulate diamond -``` bash +```bash # append to o2-sim --configKeyValues="Diamond.width[2]=6.;"" ``` @@ -86,13 +86,27 @@ TODO ```bash # Create Full Geometry -o2-sim -g pythia8pp -j10 --detectorList ALICE2.1 --run 303901 -n0 +o2-sim --detectorList ALICE2.1 --run 303901 -n0 cp o2sim_geometry.root ${ALICEO2_CCDB_LOCALCACHE}/GLO/Config/Geometry/snapshot.root o2-create-aligned-geometry-workflow -b --configKeyValues "HBFUtils.startTime=1547978230000" --condition-remap="file://${ALICEO2_CCDB_LOCALCACHE}=GLO/Config/Geometry" cp o2sim_geometry-aligned.root ${ALICEO2_CCDB_LOCALCACHE}/GLO/Config/GeometryAligned/snapshot.root cp its_GeometryTGeo.root ${ALICEO2_CCDB_LOCALCACHE}/ITS/Config/Geometry/snapshot.root ``` +or copying the ideal geometry to the aligned one and: + +```cpp +{ + o2::base::GeometryManager::loadGeometry(""); + auto itsTGeo = o2::its::GeometryTGeo::Instance(); + itsTGeo->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::L2G, o2::math_utils::TransformType::T2GRot)); + TFile outF("its_GeometryTGeo.root", "recreate"); + outF.WriteObjectAny(itsTGeo, "o2::its::GeometryTGeo", "ccdb_object"); + outF.Close(); + itsTGeo->destroy(); +} +``` + ### Regenerating the TopologyDictionary 1. Clusterization w/o tracking @@ -158,7 +172,7 @@ The file `hijing.C` can be found [here](https://alice.its.cern.ch/jira/browse/AO 2. (optional) Run the macro `CreateITS3StaticDeadMap.C` and/or visualize with `CheckTileNumbering.C` 3. Move the ccdb object into `${ALICEO2_CCDB_LOCALCACHE}/IT3/Calib/DeadMap`, this is not optional since there is no default object uploaded 4. Run digitizer with `ITS3Params.useDeadChannelMap=true;`, e.g.: -``` bash +```bash o2-sim-digitizer-workflow --configKeyValues="ITS3Params.useDeadChannelMap=true;" ``` @@ -168,6 +182,89 @@ o2-sim-digitizer-workflow --configKeyValues="ITS3Params.useDeadChannelMap=true;" 1. Create misalignment parameters with `CreateMisalignmentITS3.C` 2. Visualize with `ShowCoefficients.C` 3. Run digitizer -``` bash +```bash o2-sim-digitizer-workflow -b --configKeyValues="ITS3Params.applyMisalignmentHits=true;ITS3Params.misalignmentHitsParams=misparams.root" ``` + + +### Misc +#### Setup to run SIM+DIGIT+TRACKING +```bash + +#!/bin/bash + +export IGNORE_VALIDITYCHECK_OF_CCDB_LOCALCACHE=1 +export ALICEO2_CCDB_LOCALCACHE=$PWD/ccdb + +BASE_DIR="batch_" +TOTAL_DIRS=4 +SIM_CMD="o2-sim -g pythia8pp --detectorList ALICE2.1 -m IT3 --run 303901 -n2000 --field ccdb -j8" +DIGIT_CMD="o2-sim-digitizer-workflow -b --interactionRate 675000 --run --configKeyValues=\"HBFUtils.runNumber=303901;HBFUtils.nHBFPerTF=32;ITSAlpideParam.roFrameLengthInBC=198\"" +RECO_CMD="o2-its3-reco-workflow -b --run --configKeyValues=\"ITSVertexerParam.phiCut=0.5;ITSVertexerParam.clusterContributorsCut=3;ITSVertexerParam.tanLambdaCut=0.2;ITSCATrackerParam.useTrackFollower=0;ITSCATrackerParam.findShortTracks=1;HBFUtils.runNumber=303901;HBFUtils.nHBFPerTF=32;ITSAlpideParam.roFrameLengthInBC=198\" --tracking-mode async" + +for ((i = 1; i <= TOTAL_DIRS; i++)); do + DIR="${BASE_DIR}${i}" + + if [ ! -d "$DIR" ]; then + mkdir "$DIR" + fi + + if [ -f "${DIR}/sim_done" ]; then + echo "Skipping SIM ${DIR} because _done exists." + continue + fi + + cd "$DIR" + + echo "Executing SIM command in ${DIR}..." + eval $SIM_CMD >sim.log + + touch sim_done + + cd .. +done + +for ((i = 1; i <= TOTAL_DIRS; i++)); do + DIR="${BASE_DIR}${i}" + + if [ ! -d "$DIR" ]; then + mkdir "$DIR" + fi + + if [ -f "${DIR}/digit_done" ]; then + echo "Skipping DIGIT ${DIR} because _done exists." + continue + fi + + cd "$DIR" + + echo "Executing DIGIT command in ${DIR}..." + eval $DIGIT_CMD >digit.log + + touch digit_done + + cd .. +done + +for ((i = 1; i <= TOTAL_DIRS; i++)); do + DIR="${BASE_DIR}${i}" + + if [ ! -d "$DIR" ]; then + mkdir "$DIR" + fi + + if [ -f "${DIR}/reco_done" ]; then + echo "Skipping RECO ${DIR} because _done exists." + continue + fi + + cd "$DIR" + + echo "Executing RECO command in ${DIR}..." + eval $RECO_CMD >reco.log + + touch reco_done + + cd .. +done +``` diff --git a/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx b/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx index fbc0b5d623dca..66ab4c8090b54 100644 --- a/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx +++ b/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx @@ -10,7 +10,6 @@ // or submit itself to any jurisdiction. #include "ITS3Align/MisalignmentHits.h" -#include "ITS3Base/SegmentationSuperAlpide.h" #include "ITS3Base/ITS3Params.h" #include "SimConfig/DigiParams.h" #include "DetectorsBase/Propagator.h" diff --git a/Detectors/Upgrades/ITS3/base/CMakeLists.txt b/Detectors/Upgrades/ITS3/base/CMakeLists.txt index 8695e2323bbab..306226e5088cf 100644 --- a/Detectors/Upgrades/ITS3/base/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/base/CMakeLists.txt @@ -9,11 +9,9 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -o2_add_library(ITS3Base - SOURCES src/SegmentationSuperAlpide.cxx - src/ITS3Params.cxx - PUBLIC_LINK_LIBRARIES O2::CommonConstants O2::MathUtils O2::DetectorsBase) +o2_add_library( + ITS3Base + SOURCES src/ITS3Params.cxx + PUBLIC_LINK_LIBRARIES O2::CommonConstants O2::MathUtils O2::DetectorsBase) -o2_target_root_dictionary(ITS3Base - HEADERS include/ITS3Base/SegmentationSuperAlpide.h - include/ITS3Base/ITS3Params.h) +o2_target_root_dictionary(ITS3Base HEADERS include/ITS3Base/ITS3Params.h) diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h index c685bf0f085d6..0bd548cef953d 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h @@ -25,7 +25,8 @@ struct ITS3Params : public o2::conf::ConfigurableParamHelper { bool misalignmentHitsUseProp{false}; // Use propagtor for mis-alignment std::string globalGeoMisAlignerMacro{"${O2_ROOT}/share/macro/MisAlignGeoITS3.C"}; // Path to macro for global geometry mis-alignment // Chip studies - bool useDeadChannelMap{false}; // Query for a dead channel map to study disabling individual tiles + bool useDeadChannelMap{false}; // Query for a dead channel map to study disabling individual tiles + std::string chipResponseFunction{"APTS"}; // Chip response function one of "Alpide", "APTS" or "Mosaix" (not yet available) O2ParamDef(ITS3Params, "ITS3Params"); }; diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationSuperAlpide.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h similarity index 54% rename from Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationSuperAlpide.h rename to Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h index dbdf90574ce5d..f8d4a784120a0 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationSuperAlpide.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h @@ -9,30 +9,39 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file SegmentationSuperAlpide.h -/// \brief Definition of the SegmentationSuperAlpide class +/// \file SegmentationMosaix.h +/// \brief Definition of the SegmentationMosaix class /// \author felix.schlepper@cern.ch -#ifndef ALICEO2_ITS3_SEGMENTATIONSUPERALPIDE_H_ -#define ALICEO2_ITS3_SEGMENTATIONSUPERALPIDE_H_ +#ifndef ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ +#define ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ + +#include #include "MathUtils/Cartesian.h" #include "ITS3Base/SpecsV2.h" -#include "Rtypes.h" - -#include namespace o2::its3 { /// Segmentation and response for pixels in ITS3 upgrade -class SegmentationSuperAlpide +class SegmentationMosaix { // This class defines the segmenation of the pixelArray in the tile. We define // two coordinate systems, one width x,z detector local coordianates (cm) and // the more natural row,col layout: Also all the transformation between these // two. The class provides the transformation from the tile to TGeo // coordinates. + // In fact there exist three coordinate systems and one is transient. + // 1. The curved coordinate system. The chip's local coordinate system is + // defined with its center at the the mid-point of the tube. + // 2. The flat coordinate system. This is the tube segment projected onto a flat + // surface. In the projection we implicitly assume that the inner and outer + // stretch does not depend on the radius. + // Additionally, there is a difference between the flat geometrical center + // and the phyiscal center defined by the metal layer. + // 3. The detector coordinate system. Defined by the row and column segmentation + // defined at the upper edge in the flat coord. // row,col=0 // | @@ -53,25 +62,32 @@ class SegmentationSuperAlpide // | | | // x----------------------x public: - virtual ~SegmentationSuperAlpide() = default; - SegmentationSuperAlpide(const SegmentationSuperAlpide&) = default; - SegmentationSuperAlpide(SegmentationSuperAlpide&&) = delete; - SegmentationSuperAlpide& operator=(const SegmentationSuperAlpide&) = delete; - SegmentationSuperAlpide& operator=(SegmentationSuperAlpide&&) = delete; - constexpr SegmentationSuperAlpide(int layer) : mLayer{layer} {} - - static constexpr int mNCols{constants::pixelarray::nCols}; - static constexpr int mNRows{constants::pixelarray::nRows}; - static constexpr int nPixels{mNCols * mNRows}; - static constexpr float mLength{constants::pixelarray::length}; - static constexpr float mWidth{constants::pixelarray::width}; - static constexpr float mPitchCol{constants::pixelarray::length / static_cast(mNCols)}; - static constexpr float mPitchRow{constants::pixelarray::width / static_cast(mNRows)}; - static constexpr float mSensorLayerThickness{constants::thickness}; - static constexpr float mSensorLayerThicknessEff{constants::effThickness}; - static constexpr std::array mRadii{constants::radii}; - - /// Transformation from the curved surface to a flat surface + constexpr SegmentationMosaix(int layer) : mRadius(static_cast(constants::radiiMiddle[layer])) {} + constexpr ~SegmentationMosaix() = default; + constexpr SegmentationMosaix(const SegmentationMosaix&) = default; + constexpr SegmentationMosaix(SegmentationMosaix&&) = delete; + constexpr SegmentationMosaix& operator=(const SegmentationMosaix&) = default; + constexpr SegmentationMosaix& operator=(SegmentationMosaix&&) = delete; + + static constexpr int NCols{constants::pixelarray::nCols}; + static constexpr int NRows{constants::pixelarray::nRows}; + static constexpr int NPixels{NCols * NRows}; + static constexpr float Length{constants::pixelarray::length}; + static constexpr float LengthH{Length / 2.f}; + static constexpr float Width{constants::pixelarray::width}; + static constexpr float WidthH{Width / 2.f}; + static constexpr float PitchCol{constants::pixelarray::pixels::mosaix::pitchZ}; + static constexpr float PitchRow{constants::pixelarray::pixels::mosaix::pitchX}; + static constexpr float SensorLayerThickness{constants::totalThickness}; + static constexpr float NominalYShift{constants::nominalYShift}; + + /// Transformation from the curved surface to a flat surface. + /// Additionally a shift in the flat coordinates must be applied because + /// the center of the TGeoShap when projected will be higher than the + /// physical thickness of the chip (we add an additional hull to account for + /// the copper metal interconnection which is in reality part of the chip but in our + /// simulation the silicon and metal layer are separated). Thus we shift the projected center + /// down by this difference to align the coordinate systems. /// \param xCurved Detector local curved coordinate x in cm with respect to /// the center of the sensitive volume. /// \param yCurved Detector local curved coordinate y in cm with respect to @@ -80,18 +96,20 @@ class SegmentationSuperAlpide /// the center of the sensitive volume. /// \param yFlat Detector local flat coordinate y in cm with respect to /// the center of the sensitive volume. - void curvedToFlat(const float xCurved, const float yCurved, float& xFlat, float& yFlat) const noexcept + constexpr void curvedToFlat(const float xCurved, const float yCurved, float& xFlat, float& yFlat) const noexcept { - // MUST align the flat surface with the curved surface with the original pixel array is on + // MUST align the flat surface with the curved surface with the original pixel array is on and account for metal + // stack float dist = std::hypot(xCurved, yCurved); - float phiReadout = constants::tile::readout::width / constants::radii[mLayer]; float phi = std::atan2(yCurved, xCurved); - xFlat = mRadii[mLayer] * (phi - phiReadout) - constants::pixelarray::width / 2.; - yFlat = dist - mRadii[mLayer]; + xFlat = (mRadius * phi) - WidthH; + // the y position is in the silicon volume however we need the chip volume (silicon+metalstack) + // this is accounted by a y shift + yFlat = dist - mRadius + NominalYShift; } /// Transformation from the flat surface to a curved surface - /// It works only if the detector is not rototraslated + /// It works only if the detector is not rototraslated. /// \param xFlat Detector local flat coordinate x in cm with respect to /// the center of the sensitive volume. /// \param yFlat Detector local flat coordinate y in cm with respect to @@ -100,13 +118,15 @@ class SegmentationSuperAlpide /// the center of the sensitive volume. /// \param yCurved Detector local curved coordinate y in cm with respect to /// the center of the sensitive volume. - void flatToCurved(float xFlat, float yFlat, float& xCurved, float& yCurved) const noexcept + constexpr void flatToCurved(float xFlat, float yFlat, float& xCurved, float& yCurved) const noexcept { - // MUST align the flat surface with the curved surface with the original pixel array is on - float dist = yFlat + mRadii[mLayer]; - float phiReadout = constants::tile::readout::width / mRadii[mLayer]; - xCurved = dist * std::cos(phiReadout + (xFlat + constants::pixelarray::width / 2.) / mRadii[mLayer]); - yCurved = dist * std::sin(phiReadout + (xFlat + constants::pixelarray::width / 2.) / mRadii[mLayer]); + // MUST align the flat surface with the curved surface with the original pixel array is on and account for metal + // stack + // the y position is in the chip volume however we need the silicon volume + // this is accounted by a -y shift + float dist = yFlat - NominalYShift + mRadius; + xCurved = dist * std::cos((xFlat + WidthH) / mRadius); + yCurved = dist * std::sin((xFlat + WidthH) / mRadius); } /// Transformation from Geant detector centered local coordinates (cm) to @@ -120,7 +140,7 @@ class SegmentationSuperAlpide /// the center of the sensitive volume. /// \param int iRow Detector x cell coordinate. /// \param int iCol Detector z cell coordinate. - bool localToDetector(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept + constexpr bool localToDetector(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept { localToDetectorUnchecked(xRow, zCol, iRow, iCol); if (!isValid(iRow, iCol)) { @@ -131,11 +151,10 @@ class SegmentationSuperAlpide } // Same as localToDetector w.o. checks. - void localToDetectorUnchecked(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept + constexpr void localToDetectorUnchecked(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept { - namespace cp = constants::pixelarray; - iRow = std::floor((cp::width / 2. - xRow) / mPitchRow); - iCol = std::floor((zCol + cp::length / 2.) / mPitchCol); + iRow = static_cast(std::floor((WidthH - xRow) / PitchRow)); + iCol = static_cast(std::floor((zCol + LengthH) / PitchCol)); } /// Transformation from Detector cell coordinates to Geant detector centered @@ -148,7 +167,7 @@ class SegmentationSuperAlpide /// center of the sensitive volume. /// If iRow and or iCol is outside of the segmentation range a value of -0.5*Dx() /// or -0.5*Dz() is returned. - bool detectorToLocal(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + constexpr bool detectorToLocal(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept { if (!isValid(iRow, iCol)) { return false; @@ -159,11 +178,10 @@ class SegmentationSuperAlpide // Same as detectorToLocal w.o. checks. // We position ourself in the middle of the pixel. - void detectorToLocalUnchecked(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + constexpr void detectorToLocalUnchecked(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept { - namespace cp = constants::pixelarray; - xRow = -(iRow + 0.5) * mPitchRow + cp::width / 2.; - zCol = (iCol + 0.5) * mPitchCol - cp::length / 2.; + xRow = -(static_cast(iRow) + 0.5f) * PitchRow + WidthH; + zCol = (static_cast(iCol) + 0.5f) * PitchCol - LengthH; } bool detectorToLocal(int const row, int const col, math_utils::Point3D& loc) const noexcept @@ -172,7 +190,7 @@ class SegmentationSuperAlpide if (!detectorToLocal(row, col, xRow, zCol)) { return false; } - loc.SetCoordinates(xRow, 0., zCol); + loc.SetCoordinates(xRow, NominalYShift, zCol); return true; } @@ -180,28 +198,23 @@ class SegmentationSuperAlpide { float xRow{0.}, zCol{0.}; detectorToLocalUnchecked(row, col, xRow, zCol); - loc.SetCoordinates(xRow, 0., zCol); + loc.SetCoordinates(xRow, NominalYShift, zCol); } private: template - [[nodiscard]] bool isValid(T const row, T const col) const noexcept + [[nodiscard]] constexpr bool isValid(T const row, T const col) const noexcept { if constexpr (std::is_floating_point_v) { // compares in local coord. - namespace cp = constants::pixelarray; - return !static_cast(row <= -cp::width / 2. || cp::width / 2. <= row || col <= -cp::length / 2. || cp::length / 2. <= col); + return (-WidthH < row && row < WidthH && -LengthH < col && col < LengthH); } else { // compares in rows/cols - return !static_cast(row < 0 || row >= static_cast(mNRows) || col < 0 || col >= static_cast(mNCols)); + return !static_cast(row < 0 || row >= static_cast(NRows) || col < 0 || col >= static_cast(NCols)); } } - const int mLayer{0}; ///< chip layer - - ClassDef(SegmentationSuperAlpide, 1); + float mRadius; }; -/// Segmentation array -extern const std::array SuperSegmentations; } // namespace o2::its3 #endif diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h index d3efde58d0e0d..fedaad9182cce 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h @@ -21,114 +21,164 @@ #include +// This files defines the design specifications of the chip. +// Each TGeoShape has the following properties +// length: dimension in z-axis +// width: dimension in xy-axes +// color: for visulisation namespace o2::its3::constants { -constexpr float cm{1e+2}; // This is the default unit of TGeo so we use this as scale -constexpr float mu{1e-6 * cm}; -constexpr float mm{1e-3 * cm}; +constexpr double cm{1e+2}; // This is the default unit of TGeo so we use this as scale +constexpr double mu{1e-6 * cm}; +constexpr double mm{1e-3 * cm}; namespace pixelarray { -constexpr float width{9.197 * mm}; -constexpr float length{3.571 * mm}; +constexpr double width{9.197 * mm}; +constexpr double length{3.571 * mm}; constexpr int nCols{156}; constexpr int nRows{442}; constexpr int nPixels{nRows * nCols}; constexpr EColor color{kGreen}; -constexpr float area{width * length}; +constexpr double area{width * length}; } // namespace pixelarray namespace tile { namespace biasing { -constexpr float width{0.06 * mm}; -constexpr float length{3.571 * mm}; +constexpr double width{0.06 * mm}; +constexpr double length{3.571 * mm}; constexpr EColor color{kYellow}; static_assert(length == pixelarray::length); } // namespace biasing namespace powerswitches { -constexpr float width{9.257 * mm}; -constexpr float length{0.02 * mm}; -constexpr float z{pixelarray::width}; +constexpr double width{9.257 * mm}; +constexpr double length{0.02 * mm}; +constexpr double z{pixelarray::width}; constexpr EColor color{kBlue}; } // namespace powerswitches namespace readout { -constexpr float width{0.525 * mm}; -constexpr float length{3.591 * mm}; +constexpr double width{0.525 * mm}; +constexpr double length{3.591 * mm}; constexpr EColor color{kMagenta}; static_assert(length == (biasing::length + powerswitches::length)); } // namespace readout -constexpr float length{readout::length}; -constexpr float width{powerswitches::width + readout::width}; +constexpr double length{readout::length}; +constexpr double width{powerswitches::width + readout::width}; } // namespace tile namespace rsu { namespace databackbone { -constexpr float width{9.782 * mm}; -constexpr float length{0.06 * mm}; +constexpr double width{9.782 * mm}; +constexpr double length{0.06 * mm}; constexpr EColor color{kRed}; } // namespace databackbone -constexpr float width{19.564 * mm}; -constexpr float length{21.666 * mm}; +constexpr double width{19.564 * mm}; +constexpr double length{21.666 * mm}; constexpr unsigned int nTiles{12}; } // namespace rsu namespace segment { -constexpr float width{rsu::width}; +constexpr double width{rsu::width}; namespace lec { -constexpr float width{segment::width}; -constexpr float length{4.5 * mm}; +constexpr double width{segment::width}; +constexpr double length{4.5 * mm}; constexpr EColor color{kCyan}; } // namespace lec namespace rec { -constexpr float width{segment::width}; -constexpr float length{1.5 * mm}; +constexpr double width{segment::width}; +constexpr double length{1.5 * mm}; constexpr EColor color{kCyan}; } // namespace rec constexpr unsigned int nRSUs{12}; constexpr unsigned int nTilesPerSegment{nRSUs * rsu::nTiles}; -constexpr float length{nRSUs * rsu::length + lec::length + rec::length}; -constexpr float lengthSensitive{nRSUs * rsu::length}; +constexpr double length{(nRSUs * rsu::length) + lec::length + rec::length}; +constexpr double lengthSensitive{nRSUs * rsu::length}; } // namespace segment namespace carbonfoam { // TODO: Waiting for the further information from WP5(Corrado) -constexpr float longeronsWidth{2.0 * mm}; // what is the height of the longerons? -constexpr float longeronsLength{263 * mm}; // from blueprint -constexpr float HringLength{6.0 * mm}; // from blueprint -constexpr float edgeBetwChipAndFoam{1.0 * mm}; // from blueprint but not used cause forms are already overlapping -constexpr float gapBetwHringsLongerons{0.05 * mm}; // from blueprint -constexpr std::array nHoles{11, 11, 11}; // how many holes for each layer? -constexpr std::array radiusHoles{1.0 * mm, 1.0 * mm, 2.0 * mm}; // what is the radius of the holes for each layer? +constexpr double longeronsWidth{2.0 * mm}; // what is the height of the longerons? +constexpr double longeronsLength{263 * mm}; // from blueprint +constexpr double HringLength{6.0 * mm}; // from blueprint +constexpr double edgeBetwChipAndFoam{1.0 * mm}; // from blueprint but not used cause forms are already overlapping +constexpr double gapBetwHringsLongerons{0.05 * mm}; // from blueprint +constexpr std::array nHoles{11, 11, 11}; // how many holes for each layer? +constexpr std::array radiusHoles{1.0 * mm, 1.0 * mm, 2.0 * mm}; // what is the radius of the holes for each layer? constexpr EColor color{kGray}; } // namespace carbonfoam +namespace metalstack +{ +constexpr double thickness{5 * mu}; // physical thickness of the copper metal stack +constexpr double length{segment::length}; +constexpr double width{segment::width}; +constexpr EColor color{kBlack}; +} // namespace metalstack +namespace silicon +{ +constexpr double thickness{45 * mu}; // thickness of silicon +constexpr double thicknessIn{(thickness + metalstack::thickness) / 2.}; // inner silicon thickness +constexpr double thicknessOut{(thickness - metalstack::thickness) / 2.}; // outer silicon thickness +} // namespace silicon constexpr unsigned int nLayers{3}; constexpr unsigned int nTotLayers{7}; constexpr unsigned int nSensorsIB{2 * nLayers}; -constexpr float equatorialGap{1 * mm}; +constexpr double equatorialGap{1 * mm}; constexpr std::array nSegments{3, 4, 5}; -constexpr float thickness{50 * mu}; //< Physical Thickness of chip -constexpr float effThickness{66 * mu}; //< Physical thickness + metal substrate -constexpr std::array radii{19.0006 * mm, 25.228 * mm, 31.4554 * mm}; // middle radius e.g. inner radius+thickness/2. -constexpr std::array radiiInner{radii[0] - thickness / 2.f, radii[1] - thickness / 2.f, radii[2] - thickness / 2.f}; // inner radius -constexpr std::array radiiOuter{radii[0] + thickness / 2.f, radii[1] + thickness / 2.f, radii[2] + thickness / 2.f}; // inner radius +constexpr double totalThickness{silicon::thickness + metalstack::thickness}; // total chip thickness +constexpr std::array radii{19.0006 * mm, 25.228 * mm, 31.4554 * mm}; // nominal radius +constexpr std::array radiiInner{radii[0] - silicon::thicknessIn, radii[1] - silicon::thicknessIn, radii[2] - silicon::thicknessIn}; // inner silicon radius +constexpr std::array radiiOuter{radii[0] + silicon::thicknessOut, radii[1] + silicon::thicknessOut, radii[2] + silicon::thicknessOut}; // outer silicon radius +constexpr std::array radiiMiddle{(radiiInner[0] + radiiOuter[0]) / 2., (radiiInner[1] + radiiOuter[1]) / 2., (radiiInner[2] + radiiOuter[2]) / 2.}; // middle silicon radius +constexpr double nominalYShift{-metalstack::thickness / 2.}; // shift to position in silicion volume to the chip volume (silicon+metalstack) + +// extra information of pixels and their response functions +namespace pixelarray::pixels +{ +namespace mosaix +{ +constexpr double pitchX{width / static_cast(nRows)}; +constexpr double pitchZ{length / static_cast(nCols)}; +} // namespace mosaix +namespace apts +{ +constexpr double pitchX{15.0 * mu}; +constexpr double pitchZ{15.0 * mu}; +constexpr double responseUpperLimit{10 * mu}; +constexpr double responseYShift{responseUpperLimit - silicon::thicknessOut}; +} // namespace apts +namespace moss +{ +namespace top +{ +constexpr double pitchX{22.5 * mu}; +constexpr double pitchZ{22.5 * mu}; +} // namespace top +namespace bot +{ +constexpr double pitchX{18.0 * mu}; +constexpr double pitchZ{18.0 * mu}; +} // namespace bot +} // namespace moss +} // namespace pixelarray::pixels + namespace detID { -constexpr unsigned int mDetIDs{2 * 12 * 12 * 12}; //< 2 Hemispheres * (3,4,5=12 segments in a layer) * 12 RSUs in a segment * 12 Tiles in a RSU -constexpr unsigned int l0IDStart{0}; //< Start DetID layer 0 -constexpr unsigned int l0IDEnd{2 * 3 * 12 * 12 - 1}; //< End First DetID layer 0; inclusive range -constexpr unsigned int l0IDTot{2 * 3 * 12 * 12}; //< Total DetID in Layer 0 -constexpr unsigned int l1IDStart{l0IDEnd + 1}; //< Start DetID layer 1 -constexpr unsigned int l1IDEnd{l1IDStart + 2 * 4 * 12 * 12 - 1}; //< End First DetID layer 1; inclusive range -constexpr unsigned int l1IDTot{2 * 4 * 12 * 12}; //< Total DetID in Layer 1 -constexpr unsigned int l2IDStart{l1IDEnd + 1}; //< Start DetID layer 2 -constexpr unsigned int l2IDEnd{l2IDStart + 2 * 5 * 12 * 12 - 1}; //< End First DetID layer 2; inclusive range -constexpr unsigned int l2IDTot{2 * 5 * 12 * 12}; //< Total DetID in Layer 2 -constexpr unsigned int nChips{l2IDEnd + 1}; //< number of Chips (PixelArrays) in IB +constexpr unsigned int mDetIDs{2 * 12 * 12 * 12}; //< 2 Hemispheres * (3,4,5=12 segments in a layer) * 12 RSUs in a segment * 12 Tiles in a RSU +constexpr unsigned int l0IDStart{0}; //< Start DetID layer 0 +constexpr unsigned int l0IDEnd{(2 * 3 * 12 * 12) - 1}; //< End First DetID layer 0; inclusive range +constexpr unsigned int l0IDTot{2 * 3 * 12 * 12}; //< Total DetID in Layer 0 +constexpr unsigned int l1IDStart{l0IDEnd + 1}; //< Start DetID layer 1 +constexpr unsigned int l1IDEnd{l1IDStart + (2 * 4 * 12 * 12) - 1}; //< End First DetID layer 1; inclusive range +constexpr unsigned int l1IDTot{2 * 4 * 12 * 12}; //< Total DetID in Layer 1 +constexpr unsigned int l2IDStart{l1IDEnd + 1}; //< Start DetID layer 2 +constexpr unsigned int l2IDEnd{l2IDStart + (2 * 5 * 12 * 12) - 1}; //< End First DetID layer 2; inclusive range +constexpr unsigned int l2IDTot{2 * 5 * 12 * 12}; //< Total DetID in Layer 2 +constexpr unsigned int nChips{l2IDEnd + 1}; //< number of Chips (PixelArrays) in IB template inline T getDetID2Layer(T detID) diff --git a/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h b/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h index dc0557824e0f8..144711b052a1b 100644 --- a/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h +++ b/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h @@ -15,7 +15,6 @@ #pragma link off all classes; #pragma link off all functions; -#pragma link C++ class o2::its3::SegmentationSuperAlpide + ; #pragma link C++ class o2::its3::ITS3Params + ; #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its3::ITS3Params> + ; diff --git a/Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx b/Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx deleted file mode 100644 index 26ca09f351bec..0000000000000 --- a/Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "ITS3Base/SegmentationSuperAlpide.h" - -ClassImp(o2::its3::SegmentationSuperAlpide); - -namespace o2::its3 -{ - -const std::array SuperSegmentations{0, 1, 2}; -} diff --git a/Detectors/Upgrades/ITS3/data/CMakeLists.txt b/Detectors/Upgrades/ITS3/data/CMakeLists.txt new file mode 100644 index 0000000000000..ba8b60c8aa7eb --- /dev/null +++ b/Detectors/Upgrades/ITS3/data/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +add_custom_target( + GenerateAPTSResponse ALL + COMMAND + ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator -c APTS -i + ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ -o + ${CMAKE_CURRENT_BINARY_DIR}/ + BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root + DEPENDS GenerateAlpideResponse + COMMENT "Generating APTSResponseData.root") +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root" + DESTINATION + "${CMAKE_INSTALL_PREFIX}/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/" +) diff --git a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt index bdd0329c55ecd..39e435f0ba2e6 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt @@ -19,8 +19,8 @@ its3_add_macro(CheckHits.C) its3_add_macro(CheckDigitsDensity.C) its3_add_macro(CheckClusterSize.C) its3_add_macro(CompareClusterSize.C) -its3_add_macro(CheckSuperAlpideSegment.C) -its3_add_macro(CheckSuperAlpideSegmentTrans.C) +its3_add_macro(CheckMosaixSegment.C) +its3_add_macro(CheckMosaixSegmentTrans.C) its3_add_macro(CompareClustersAndDigits.C) its3_add_macro(CheckROFs.C) its3_add_macro(CheckTileNumbering.C) diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C b/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C index addaaf47269d2..564b20350b883 100755 --- a/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C @@ -43,6 +43,7 @@ #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCEventHeader.h" #include "SimulationDataFormat/MCTrack.h" +#include "ITS3Base/SpecsV2.h" #endif #define ENABLE_UPGRADES #include "SimulationDataFormat/MCTruthContainer.h" @@ -65,7 +66,11 @@ void checkFile(const std::unique_ptr& file); inline auto hist_map(unsigned short id) { - return std::clamp(id, static_cast(0), static_cast(6)) / 2; + int lay = o2::its3::constants::detID::getDetID2Layer(id); + if (lay == -1) { + return nLayers - 1; + } + return lay; } void CheckClusterSize(std::string clusFileName = "o2clus_its.root", @@ -133,7 +138,7 @@ void CheckClusterSize(std::string clusFileName = "o2clus_its.root", std::vector hOtherSecondaryEta; std::vector hOtherSecondaryPt; std::vector hOtherSecondaryPhi; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < nLayers; ++i) { hPrimary.emplace_back(Form("primary/L%d", i), Form("L%d Primary Cluster Size", i), maxClusterSize, 0, maxClusterSize); hPrimaryEta.emplace_back(Form("primary/EtaL%d", i), Form("L%d Primary Cluster Size vs Eta", i), maxClusterSize, 0, maxClusterSize, 100, -3.0, 3.0); hPrimaryPt.emplace_back(Form("primary/Pt%d", i), Form("L%d Primary Cluster Size vs Pt", i), maxClusterSize, 0, maxClusterSize, 100, 0.0, 10.0); @@ -238,35 +243,39 @@ void CheckClusterSize(std::string clusFileName = "o2clus_its.root", int nROFRec = (int)rofRecVec.size(); auto pattIt = patternsPtr->cbegin(); + int cInvalid{0}, cGood{0}; for (int irof = 0; irof < nROFRec; irof++) { const auto& rofRec = rofRecVec[irof]; - // rofRec.print(); + /*rofRec.print();*/ for (int icl = 0; icl < rofRec.getNEntries(); icl++) { int clEntry = rofRec.getFirstEntry() + icl; const auto& cluster = clusArr[clEntry]; - // cluster.print(); + /*cluster.print();*/ auto pattId = cluster.getPatternID(); auto id = cluster.getSensorID(); + auto ib = o2::its3::constants::detID::isDetITS3(id); int clusterSize{-1}; - if (pattId == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattId)) { + if (pattId == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattId, ib)) { o2::itsmft::ClusterPattern patt(pattIt); clusterSize = patt.getNPixels(); continue; } else { - clusterSize = dict.getNpixels(pattId); + clusterSize = dict.getNpixels(pattId, ib); } const auto& label = (clusLabArr->getLabels(clEntry))[0]; if (!label.isValid() || label.getSourceID() != 0 || !label.isCorrect()) { + ++cInvalid; continue; } + ++cGood; const int trackID = label.getTrackID(); int evID = label.getEventID(); const auto& pInfo = info[evID][trackID]; - if (id > 6) { + if (!o2::its3::constants::detID::isDetITS3(id)) { hOuterBarrel.Fill(clusterSize); } @@ -332,6 +341,7 @@ void CheckClusterSize(std::string clusFileName = "o2clus_its.root", } } } + std::cout << "Good labels: " << cGood << "; invalid: " << cInvalid << '\n'; std::cout << "Done measuring cluster sizes:" << std::endl; for (int i = 0; i < nLayers; ++i) { std::cout << "* Layer " << i << ":\n"; diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C b/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C index af03ed7a9877b..006271a1ea7bd 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C @@ -25,7 +25,7 @@ #define ENABLE_UPGRADES #include "DetectorsCommonDataFormats/DetID.h" #include "ITSMFTBase/SegmentationAlpide.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" #include "ITSBase/GeometryTGeo.h" #include "DataFormatsITSMFT/CompCluster.h" @@ -50,22 +50,24 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", using namespace o2::base; using namespace o2::its; - using SuperSegmentation = o2::its3::SegmentationSuperAlpide; + using MosaixSegmentation = o2::its3::SegmentationMosaix; using Segmentation = o2::itsmft::SegmentationAlpide; using o2::itsmft::CompClusterExt; using o2::itsmft::Hit; using ROFRec = o2::itsmft::ROFRecord; using MC2ROF = o2::itsmft::MC2ROFRecord; using HitVec = std::vector; - using MC2HITS_map = std::unordered_map; // maps (track_ID<<16 + chip_ID) to entry in the hit vector + using MC2HITS_map = std::unordered_map; // maps (track_ID<<32 + chip_ID) to entry in the hit vector + std::array mMosaixSegmentations{0, 1, 2}; std::vector hitVecPool; std::vector mc2hitVec; - ULong_t cPattValid{0}, cPattInvalid{0}, cLabelInvalid{0}, cNoMC{0}; + ULong_t cPattValidIB{0}, cPattInvalidIB{0}, cLabelInvalidIB{0}, cNoMCIB{0}; + ULong_t cPattValidOB{0}, cPattInvalidOB{0}, cLabelInvalidOB{0}, cNoMCOB{0}; TFile fout("CheckClusters.root", "recreate"); - TNtuple nt("ntc", "cluster ntuple", "ev:lab:hlx:hlz:hgx:hgz:tx:tz:cgx:cgy:cgz:clx:cly:clz:dx:dy:dz:ex:ez:patid:rof:npx:id"); + TNtuple nt("ntc", "cluster ntuple", "ev:lab:hlx:hlz:hgx:hgz:tx:tz:cgx:cgy:cgz:clx:cly:clz:dx:dy:dz:ex:ez:patid:rof:npx:id:eta:row:col:lay"); // Geometry o2::base::GeometryManager::loadGeometry(inputGeom); @@ -102,6 +104,7 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", } else { LOG(info) << "Running without dictionary !"; } + dict.print(); // ROFrecords std::vector rofRecVec, *rofRecVecP = &rofRecVec; @@ -174,20 +177,18 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", auto isIB = o2::its3::constants::detID::isDetITS3(chipID); auto layer = o2::its3::constants::detID::getDetID2Layer(chipID); auto clusterSize{-1}; - if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID)) { + if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID, isIB)) { o2::itsmft::ClusterPattern patt(pattIt); locC = dict.getClusterCoordinates(cluster, patt, false); LOGP(debug, "I am invalid and I am on chip {}", chipID); - ++cPattInvalid; + (isIB) ? ++cPattInvalidIB : ++cPattInvalidOB; continue; } else { locC = dict.getClusterCoordinates(cluster); - errX = dict.getErrX(pattID); - errZ = dict.getErrZ(pattID); - errX *= (isIB) ? SuperSegmentation::mPitchRow : Segmentation::PitchRow; - errZ *= (isIB) ? SuperSegmentation::mPitchCol : Segmentation::PitchCol; - npix = dict.getNpixels(pattID); - ++cPattValid; + errX = dict.getErrX(pattID, isIB); + errZ = dict.getErrZ(pattID, isIB); + npix = dict.getNpixels(pattID, isIB); + (isIB) ? ++cPattValidIB : ++cPattValidOB; } // Transformation to the local --> global @@ -195,7 +196,7 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", const auto& lab = (clusLabArr->getLabels(clEntry))[0]; if (!lab.isValid()) { - ++cLabelInvalid; + (isIB) ? ++cLabelInvalidIB : ++cLabelInvalidOB; continue; } @@ -207,7 +208,7 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", auto hitEntry = mc2hit.find(key); if (hitEntry == mc2hit.end()) { LOG(debug) << "Failed to find MC hit entry for Tr" << trID << " chipID" << chipID; - ++cNoMC; + (isIB) ? ++cNoMCIB : ++cNoMCOB; continue; } const auto& hit = (*hitArray)[hitEntry->second]; @@ -234,25 +235,22 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", } else { // compare in local flat coordinates float xFlatEnd{0.}, yFlatEnd{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(locH.X(), locH.Y(), xFlatEnd, yFlatEnd); + mMosaixSegmentations[layer].curvedToFlat(locH.X(), locH.Y(), xFlatEnd, yFlatEnd); locH.SetXYZ(xFlatEnd, yFlatEnd, locH.Z()); float xFlatSta{0.}, yFlatSta{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(locHsta.X(), locHsta.Y(), xFlatSta, yFlatSta); + mMosaixSegmentations[layer].curvedToFlat(locHsta.X(), locHsta.Y(), xFlatSta, yFlatSta); locHsta.SetXYZ(xFlatSta, yFlatSta, locHsta.Z()); - // recalculate x/y in flat - // x0 = xFlatSta, dltx = xFlatEnd - x0; - // y0 = yFlatSta, dlty = yFlatEnd - y0; - // r = (0.5 * (SuperSegmentation::mSensorLayerThickness - SuperSegmentation::mSensorLayerThicknessEff) - y0) / dlty; - // locH.SetXYZ(x0 + r * dltx, y0 + r * dlty, z0 + r * dltz); // not really precise, but okish locH.SetXYZ(0.5f * (locH.X() + locHsta.X()), 0.5f * (locH.Y() + locHsta.Y()), 0.5f * (locH.Z() + locHsta.Z())); - o2::its3::SuperSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlatSta, yFlatSta); + mMosaixSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlatSta, yFlatSta); locC.SetXYZ(xFlatSta, yFlatSta, locC.Z()); } + float theta = std::acos(gloC.Z() / gloC.Rho()); + float eta = -std::log(std::tan(theta / 2)); - std::array data = {(float)lab.getEventID(), (float)trID, + std::array data = {(float)lab.getEventID(), (float)trID, locH.X(), locH.Z(), gloH.X(), gloH.Z(), dltx / dlty, dltz / dlty, @@ -260,13 +258,15 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", locC.X(), locC.Y(), locC.Z(), locC.X() - locH.X(), locC.Y() - locH.Y(), locC.Z() - locH.Z(), errX, errZ, (float)pattID, - (float)rofRec.getROFrame(), (float)npix, (float)chipID}; + (float)rofRec.getROFrame(), (float)npix, (float)chipID, eta, (float)cluster.getRow(), (float)cluster.getCol(), (float)layer}; nt.Fill(data.data()); } } - LOGP(info, "There were {} valid PatternIDs and {} ({:.1f}%) invalid ones", cPattValid, cPattInvalid, ((float)cPattInvalid / (float)(cPattInvalid + cPattValid)) * 100); - LOGP(info, "There were {} invalid Labels and {} with No MC Hit information ", cLabelInvalid, cNoMC); + LOGP(info, "IB {} valid PatternIDs and {} ({:.1f}%) invalid ones", cPattValidIB, cPattInvalidIB, ((float)cPattInvalidIB / (float)(cPattInvalidIB + cPattValidIB)) * 100); + LOGP(info, "IB {} invalid Labels and {} with No MC Hit information ", cLabelInvalidIB, cNoMCIB); + LOGP(info, "OB {} valid PatternIDs and {} ({:.1f}%) invalid ones", cPattValidOB, cPattInvalidOB, ((float)cPattInvalidOB / (float)(cPattInvalidOB + cPattValidOB)) * 100); + LOGP(info, "OB {} invalid Labels and {} with No MC Hit information ", cLabelInvalidOB, cNoMCOB); auto canvCgXCgY = new TCanvas("canvCgXCgY", "", 1600, 1600); canvCgXCgY->Divide(2, 2); @@ -292,6 +292,18 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", nt.Draw("dx:dz>>h_dx_vs_dz_OB_z(1000, -0.01, 0.01, 1000, -0.01, 0.01)", "id >= 3456 && abs(cgz) < 2", "colz"); canvdXdZ->SaveAs("it3clusters_dx_vs_dz.pdf"); + auto canvCHXZ = new TCanvas("canvCHXZ", "", 1600, 1600); + canvCHXZ->Divide(2, 2); + canvCHXZ->cd(1); + nt.Draw("(cgx-hgx)*10000:eta>>h_chx_IB(101,-1.4,1.4,101,-50,50)", "id<3456", "prof"); + canvCHXZ->cd(2); + nt.Draw("(cgx-hgx)*10000:eta>>h_chx_OB(101,-1.4,1.4,101,-50,50)", "id>=3456", "prof"); + canvCHXZ->cd(3); + nt.Draw("(cgz-hgz)*10000:eta>>h_chz_IB(101,-1.4,1.4,101,-50,50)", "id<3456", "prof"); + canvCHXZ->cd(4); + nt.Draw("(cgz-hgz)*10000:eta>>h_chz_OB(101,-1.4,1.4,101,-50,50)", "id>=3456", "prof"); + canvCgXCgY->SaveAs("it3clusters_xz_eta.pdf"); + auto c1 = new TCanvas("p1", "pullX"); c1->cd(); c1->SetLogy(); diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C index 0c8d9c3bdfbec..67b75e33bc430 100755 --- a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C @@ -37,7 +37,7 @@ #include "ITS3Base/SpecsV2.h" #include "CommonConstants/MathConstants.h" #include "DataFormatsITSMFT/Digit.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "DetectorsBase/GeometryManager.h" #include "ITSBase/GeometryTGeo.h" #include "fairlogger/Logger.h" @@ -56,7 +56,7 @@ constexpr double qedRate = qedXSection / hadXSection * interaction_rate; // Hz constexpr double qedFactor = qedRate * integration_time; // a.u. using o2::itsmft::Digit; namespace its3 = o2::its3; -using SSAlpide = its3::SegmentationSuperAlpide; +using Mosaix = its3::SegmentationMosaix; void checkFile(const std::unique_ptr& file); @@ -64,7 +64,7 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig { gROOT->SetBatch(batch); LOGP(debug, "Checking Digit ITS3 Density"); - // Vars + std::array mMosaixSegmentations{0, 1, 2}; // Geometry o2::base::GeometryManager::loadGeometry(geomFileName); @@ -80,8 +80,8 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig digitTree->SetBranchAddress("IT3Digit", &digitArrayPtr); std::array hists; for (int i{3}; i--;) { - double rmin = its3::constants::radii[i] - its3::constants::thickness; - double rmax = its3::constants::radii[i] + its3::constants::thickness; + double rmin = its3::constants::radiiInner[i]; + double rmax = its3::constants::radiiOuter[i]; hists[i] = new TH2F(Form("h_digits_dens_L%d", i), Form("Digit Density L%d in %d Events; Z_{Glo} [cm]; R_{Glo} [cm]", i, nEvents), 100, -15, 15, 100, rmin, rmax); } @@ -103,8 +103,8 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig // goto curved coordinates float x{0.f}, y{0.f}, z{0.f}; float xFlat{0.f}, yFlat{0.f}; - its3::SuperSegmentations[layer].detectorToLocal(row, col, xFlat, z); - its3::SuperSegmentations[layer].flatToCurved(xFlat, 0., x, y); + mMosaixSegmentations[layer].detectorToLocal(row, col, xFlat, z); + mMosaixSegmentations[layer].flatToCurved(xFlat, 0., x, y); const o2::math_utils::Point3D locD(x, y, z); const auto gloD = gman->getMatrixL2G(id)(locD); // convert to global const auto R = std::hypot(gloD.X(), gloD.Y()); @@ -115,7 +115,7 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig std::unique_ptr oFile(TFile::Open("checkDigitsDensity.root", "RECREATE")); checkFile(oFile); for (const auto& h : hists) { - h->Scale(1. / (SSAlpide::mPitchCol * SSAlpide::mPitchRow * nEvents)); + h->Scale(1. / (Mosaix::PitchCol * Mosaix::PitchRow * nEvents)); h->ProjectionX()->Write(); h->Write(); } diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C index 16aa3adc8101c..1dc4a4e2d6b47 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C @@ -27,7 +27,7 @@ #define ENABLE_UPGRADES #include "ITSBase/GeometryTGeo.h" #include "DataFormatsITSMFT/Digit.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITSMFTBase/SegmentationAlpide.h" #include "ITSMFTSimulation/Hit.h" #include "MathUtils/Utils.h" @@ -51,6 +51,7 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil using o2::itsmft::Hit; using o2::itsmft::SegmentationAlpide; + std::array mMosaixSegmentations{0, 1, 2}; TFile* f = TFile::Open("CheckDigits.root", "recreate"); TNtuple* nt = new TNtuple("ntd", "digit ntuple", "id:x:y:z:rowD:colD:rowH:colH:xlH:zlH:xlcH:zlcH:dx:dz"); @@ -165,8 +166,8 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil if (isIB) { // ITS3 IB float xFlat{0.f}, yFlat{0.f}; - its3::SuperSegmentations[layer].detectorToLocal(ix, iz, xFlat, z); - its3::SuperSegmentations[layer].flatToCurved(xFlat, 0., x, y); + mMosaixSegmentations[layer].detectorToLocal(ix, iz, xFlat, z); + mMosaixSegmentations[layer].flatToCurved(xFlat, 0., x, y); } else { // ITS2 OB SegmentationAlpide::detectorToLocal(ix, iz, x, z); @@ -184,7 +185,7 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil const auto* mc2hit = &mc2hitVec[lab.getEventID()]; const auto& hitEntry = mc2hit->find(key); if (hitEntry == mc2hit->end()) { - LOGP(error, "Failed to find MC hit entry for Tr {} chipID {}", trID, chipID); + LOGP(debug, "Failed to find MC hit entry for Tr {} chipID {}", trID, chipID); continue; } @@ -196,18 +197,18 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil auto xyzLocE = gman->getMatrixL2G(chipID) ^ (hit.GetPos()); // inverse conversion from global to local auto xyzLocS = gman->getMatrixL2G(chipID) ^ (hit.GetPosStart()); o2::math_utils::Vector3D xyzLocM; - xyzLocM.SetCoordinates(0.5 * (xyzLocE.X() + xyzLocS.X()), 0.5 * (xyzLocE.Y() + xyzLocS.Y()), 0.5 * (xyzLocE.Z() + xyzLocS.Z())); + xyzLocM.SetCoordinates(0.5f * (xyzLocE.X() + xyzLocS.X()), 0.5f * (xyzLocE.Y() + xyzLocS.Y()), 0.5f * (xyzLocE.Z() + xyzLocS.Z())); float xlc = 0., zlc = 0.; int row = 0, col = 0; if (isIB) { float xFlat{0.}, yFlat{0.}; - its3::SuperSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); xyzLocM.SetCoordinates(xFlat, yFlat, xyzLocM.Z()); - its3::SuperSegmentations[layer].curvedToFlat(locD.X(), locD.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locD.X(), locD.Y(), xFlat, yFlat); locD.SetCoordinates(xFlat, yFlat, locD.Z()); - if (auto v1 = !its3::SuperSegmentations[layer].localToDetector(xyzLocM.X(), xyzLocM.Z(), row, col), - v2 = !its3::SuperSegmentations[layer].detectorToLocal(row, col, xlc, zlc); + if (auto v1 = !mMosaixSegmentations[layer].localToDetector(xyzLocM.X(), xyzLocM.Z(), row, col), + v2 = !mMosaixSegmentations[layer].detectorToLocal(row, col, xlc, zlc); v1 || v2) { continue; } @@ -223,7 +224,7 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil (isIB) ? ++nDigitFilledIB : ++nDigitFilledOB; } // end loop on digits array - } // end loop on ROFRecords array + } // end loop on ROFRecords array auto canvXY = new TCanvas("canvXY", "", 1600, 1600); canvXY->Divide(2, 2); diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckHits.C b/Detectors/Upgrades/ITS3/macros/test/CheckHits.C index 7833b7c205f4a..00ac0a992ba39 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckHits.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckHits.C @@ -31,7 +31,6 @@ #define ENABLE_UPGRADES #include "CommonConstants/MathConstants.h" -#include "ITS3Base/SegmentationSuperAlpide.h" #include "ITS3Base/SpecsV2.h" #include "ITSMFTSimulation/Hit.h" #include "SimulationDataFormat/MCTrack.h" @@ -39,7 +38,6 @@ namespace it3c = o2::its3::constants; namespace it3d = it3c::detID; -using SSAlpide = o2::its3::SegmentationSuperAlpide; using o2::itsmft::Hit; constexpr double interaction_rate = 50e3; // Hz diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegment.C b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegment.C similarity index 78% rename from Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegment.C rename to Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegment.C index 76ac02959415d..12e1ab3a7280d 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegment.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegment.C @@ -9,9 +9,6 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file CheckTracksITS3.C -/// \brief Simple macro to check ITS3 tracks - #if !defined(__CLING__) || defined(__ROOTCLING__) #include "Rtypes.h" @@ -24,14 +21,6 @@ #include "TGLViewer.h" #include "TMath.h" -#include "TEveGeoNode.h" -#include "TEveManager.h" -#include "TEveViewer.h" -#include "TEvePointSet.h" -#include "TEveTrackPropagator.h" -#include "TEveTrack.h" -#include "TEveVSDStructs.h" - #include "TFile.h" #include "TGraph.h" #include "TH1D.h" @@ -49,40 +38,41 @@ #include "MathUtils/Cartesian.h" #include "ITS3Base/SpecsV2.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITSBase/GeometryTGeo.h" #endif using gITS = o2::its::GeometryTGeo; -void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, - bool isTestFlatToCurved = false, - bool isTestLocalToGlobal = false) +void CheckMosaixSegment(bool isTestDetectorToLocal = false, + bool isTestFlatToCurved = false, + bool isTestLocalToGlobal = false) { using namespace o2::its3; - static constexpr unsigned int mNCols{SegmentationSuperAlpide::mNCols}; - static constexpr unsigned int mNRows{SegmentationSuperAlpide::mNRows}; + static constexpr unsigned int mNCols{SegmentationMosaix::NCols}; + static constexpr unsigned int mNRows{SegmentationMosaix::NRows}; static constexpr unsigned int nPixels{mNCols * mNRows}; + std::array mMosaixSegmentations{0, 1, 2}; if (isTestDetectorToLocal || isTestFlatToCurved) { namespace cp = constants::pixelarray; - TH2I* h_raw_col = new TH2I("h_raw_col", "raws and cols sown;raw;col", mNRows, 0, mNRows, mNCols, 0, mNCols); - TH2D* h_xLocal_zLocal = new TH2D("h_xLocal_zLocal", "x and z from raws and cols;xLocal;zLocal", mNRows, -cp::length / 2, cp::length / 2, mNCols, -cp::width / 2, cp::width / 2); - TH2I* h_raw_col_translate = new TH2I("h_raw_col_translate", "raws and cols from x and z;raw;col", mNRows, 0, mNRows, mNCols, 0, mNCols); - TGraph* g_raw_xLocal = new TGraph(); - g_raw_xLocal->SetMarkerStyle(20); - g_raw_xLocal->SetMarkerSize(0.2); + TH2I* h_row_col = new TH2I("h_row_col", "rows and cols sown;row;col", mNRows, 0, mNRows, mNCols, 0, mNCols); + TH2D* h_xLocal_zLocal = new TH2D("h_xLocal_zLocal", "x and z from rows and cols;xLocal;zLocal", mNRows, -cp::length / 2, cp::length / 2, mNCols, -cp::width / 2, cp::width / 2); + TH2I* h_row_col_translate = new TH2I("h_row_col_translate", "rows and cols from x and z;row;col", mNRows, 0, mNRows, mNCols, 0, mNCols); + TGraph* g_row_xLocal = new TGraph(); + g_row_xLocal->SetMarkerStyle(20); + g_row_xLocal->SetMarkerSize(0.2); TGraph* g_col_zLocal = new TGraph(); g_col_zLocal->SetMarkerStyle(20); g_col_zLocal->SetMarkerSize(0.2); - TGraph* g_raw_xLocal_translate = new TGraph(); - g_raw_xLocal_translate->SetMarkerStyle(20); - g_raw_xLocal_translate->SetMarkerSize(0.2); + TGraph* g_row_xLocal_translate = new TGraph(); + g_row_xLocal_translate->SetMarkerStyle(20); + g_row_xLocal_translate->SetMarkerSize(0.2); TGraph* g_col_zLocal_translate = new TGraph(); g_col_zLocal_translate->SetMarkerStyle(20); - SegmentationSuperAlpide seg(0); + SegmentationMosaix seg(0); int nPoint = 0; for (UInt_t i = 0; i < mNRows; ++i) { for (UInt_t j = 0; j < mNCols; ++j) { @@ -92,16 +82,16 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, int col_trans = -1; seg.detectorToLocal(i, j, xLocal, zLocal); seg.localToDetector(xLocal, zLocal, row_trans, col_trans); - g_raw_xLocal->SetPoint(nPoint, i, xLocal); + g_row_xLocal->SetPoint(nPoint, i, xLocal); g_col_zLocal->SetPoint(nPoint, j, zLocal); - g_raw_xLocal_translate->SetPoint(nPoint, xLocal, row_trans); + g_row_xLocal_translate->SetPoint(nPoint, xLocal, row_trans); g_col_zLocal_translate->SetPoint(nPoint++, zLocal, col_trans); bool pattern = ((i >= 50 && i <= 100) || (i >= 250 && i <= 350)) && ((j >= 30 && j <= 70) || (j >= 100 && j <= 120)); if (pattern) { - h_raw_col->Fill(i, j); + h_row_col->Fill(i, j); h_xLocal_zLocal->Fill(xLocal, zLocal); - h_raw_col_translate->Fill(row_trans, col_trans); + h_row_col_translate->Fill(row_trans, col_trans); } } } @@ -110,29 +100,30 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, // gStyle->SetPalette(kCMYK); c1->Divide(3, 1); c1->cd(1); - h_raw_col->Draw("colz"); + h_row_col->Draw("colz"); c1->cd(2); h_xLocal_zLocal->Draw("colz"); c1->cd(3); - h_raw_col_translate->Draw("colz"); + h_row_col_translate->Draw("colz"); TCanvas* c2 = new TCanvas("c2", "c2", 1600, 400); c2->Divide(4, 1); c2->cd(1); - g_raw_xLocal->SetTitle("xLocal vs raw;raw;xLocal"); - g_raw_xLocal->Draw("same ap"); + g_row_xLocal->SetTitle("xLocal vs row;row;xLocal"); + g_row_xLocal->Draw("same ap"); c2->cd(2); g_col_zLocal->SetTitle("zLocal vs col;col;zLocal"); g_col_zLocal->Draw("same ap"); c2->cd(3); - g_raw_xLocal_translate->SetTitle("raw_translate vs xLocal;xLocal;raw_translate"); - g_raw_xLocal_translate->Draw("same ap"); + g_row_xLocal_translate->SetTitle("row_translate vs xLocal;xLocal;row_translate"); + g_row_xLocal_translate->Draw("same ap"); c2->cd(4); g_col_zLocal_translate->SetTitle("col_translate vs zLocal;zLocal;col_translate"); g_col_zLocal_translate->Draw("same ap"); } if (isTestLocalToGlobal) { + o2::base::GeometryManager::loadGeometry(); namespace cp = constants::pixelarray; TH2D* h_xCurved_yCurved = new TH2D("h_xCurved_yCurved", "from flat to curved;x;y", 200, -1, 4, 200, -2, 3); TH2D* h_xFlat_yFlat = new TH2D("h_xFlat_yFlat", "from curved to flat ;x;y", 200, -1, 4, 200, -2, 3); @@ -170,11 +161,11 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, float xLocal_translate = 0; float yLocal_translate = 0; - SuperSegmentations[iLayer].detectorToLocal(row, col, xLocal, zLocal); - SuperSegmentations[iLayer].flatToCurved(xLocal, 0., xCurved, yCurved); + mMosaixSegmentations[iLayer].detectorToLocal(row, col, xLocal, zLocal); + mMosaixSegmentations[iLayer].flatToCurved(xLocal, 0., xCurved, yCurved); double posLocal[3] = {xCurved, yCurved, zLocal}; double posGlobal[3] = {0, 0, 0}; - SuperSegmentations[iLayer].curvedToFlat(xCurved, yCurved, xLocal_translate, yLocal_translate); + mMosaixSegmentations[iLayer].curvedToFlat(xCurved, yCurved, xLocal_translate, yLocal_translate); matrix->LocalToMaster(posLocal, posGlobal); h_xCurved_yCurved->Fill(xLocal, 0); @@ -195,8 +186,7 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, TArc* arc[3]; h_xCurved_yCurved->Draw("colz"); for (int i = 0; i < 3; i++) { - arc[i] = new TArc(-0, 0, constants::radii[i] + constants::thickness / 2., -5, 40); - arc[i]->SetLineColor(kRed); + arc[i] = new TArc(-0, 0, constants::radiiOuter[i], -5, 40); arc[i]->SetFillStyle(0); } diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegmentTrans.C b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegmentTrans.C similarity index 85% rename from Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegmentTrans.C rename to Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegmentTrans.C index 64937f2ad2855..1a723bd6017bb 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegmentTrans.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegmentTrans.C @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file CheckSuperAlpideSegmentTrans.C +/// \file CheckMosaixSegmentTrans.C /// \brief Simple macro to check ITS3 Alpide Trans #if !defined(__CLING__) || defined(__ROOTCLING__) @@ -26,7 +26,7 @@ #include "TStyle.h" #include "TTree.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" #endif @@ -37,10 +37,11 @@ constexpr float PI = 3.14159274101257324e+00f; constexpr float Rad2Deg = 180.f / PI; constexpr float Deg2Rad = 1. / Rad2Deg; -constexpr auto nRows{SegmentationSuperAlpide::mNRows}; -constexpr auto nCols{SegmentationSuperAlpide::mNCols}; -constexpr auto fLength{SegmentationSuperAlpide::mLength}; -constexpr auto fWidth{SegmentationSuperAlpide::mWidth}; +constexpr auto nRows{SegmentationMosaix::NRows}; +constexpr auto nCols{SegmentationMosaix::NCols}; +constexpr auto fLength{SegmentationMosaix::Length}; +constexpr auto fWidth{SegmentationMosaix::Width}; +const std::array mMosaixSegmentations{0, 1, 2}; TH2* DrawReverseBins(TH2* h) { @@ -83,13 +84,13 @@ void DrawXAxisCol(TH1* h) newaxis->Draw(); } -void CheckSuperAlpideSegmentTrans() +void CheckMosaixSegmentTrans() { gStyle->SetOptStat(1111111); for (int iLayer{0}; iLayer < 3; ++iLayer) { - float r_inner = constants::radii[iLayer] - constants::thickness / 2.; - float r_outer = constants::radii[iLayer] + constants::thickness / 2.; + float r_inner = constants::radiiInner[iLayer]; + float r_outer = constants::radiiOuter[iLayer]; float phiReadout_inner = constants::tile::readout::width / r_inner * Rad2Deg; float phiReadout_outer = @@ -140,10 +141,10 @@ void CheckSuperAlpideSegmentTrans() g_arc_inner->AddPoint(x_inner, y_inner); g_arc_outer->AddPoint(x_outer, y_outer); // Test Segmentation - SuperSegmentations[iLayer].curvedToFlat(x_inner, y_inner, x_inner_flat, y_inner_flat); - SuperSegmentations[iLayer].flatToCurved(x_inner_flat, y_inner_flat, x_inner_curved, y_inner_curved); - SuperSegmentations[iLayer].curvedToFlat(x_outer, y_outer, x_outer_flat, y_outer_flat); - SuperSegmentations[iLayer].flatToCurved(x_outer_flat, y_outer_flat, x_outer_curved, y_outer_curved); + mMosaixSegmentations[iLayer].curvedToFlat(x_inner, y_inner, x_inner_flat, y_inner_flat); + mMosaixSegmentations[iLayer].flatToCurved(x_inner_flat, y_inner_flat, x_inner_curved, y_inner_curved); + mMosaixSegmentations[iLayer].curvedToFlat(x_outer, y_outer, x_outer_flat, y_outer_flat); + mMosaixSegmentations[iLayer].flatToCurved(x_outer_flat, y_outer_flat, x_outer_curved, y_outer_curved); g_arc_inner_flat->AddPoint(x_inner_flat, y_inner_flat); g_arc_outer_flat->AddPoint(x_outer_flat, y_outer_flat); h_f2c_res->Fill(x_inner - x_inner_curved, y_inner - y_inner_curved); @@ -201,15 +202,12 @@ void CheckSuperAlpideSegmentTrans() for (int iCol{0}; iCol < nCols; ++iCol) { float xRow{0}, zCol{0}; int iiRow{0}, iiCol{0}; - auto v1 = - SuperSegmentations[iLayer].detectorToLocal(iRow, iCol, xRow, zCol); - auto v2 = SuperSegmentations[iLayer].localToDetector(xRow, zCol, iiRow, - iiCol); - // Info("L2D", - // "iRow=%d, iCol=%d --d2l(%s)--> xRow=%f, zCol=%f --l2d(%s)--> " - // "iiRow=%d, iiCol=%d", - // iRow, iCol, v1 ? "good" : "bad", xRow, zCol, v2 ? "good" : - // "bad", iiRow, iiCol); + auto v1 = mMosaixSegmentations[iLayer].detectorToLocal(iRow, iCol, xRow, zCol); + auto v2 = mMosaixSegmentations[iLayer].localToDetector(xRow, zCol, iiRow, iiCol); + Info("L2D", + "iRow=%d, iCol=%d --d2l(%s)--> xRow=%f, zCol=%f --l2d(%s)--> " + "iiRow=%d, iiCol=%d", + iRow, iCol, v1 ? "good" : "bad", xRow, zCol, v2 ? "good" : "bad", iiRow, iiCol); if (!v1 || !v2) { Error("LOOP", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Layer %d", iLayer); return; diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C b/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C index 3a01960b1859d..220b1d39ad42b 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C @@ -25,7 +25,7 @@ #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/SpecsV2.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "MathUtils/Cartesian.h" #include "MathUtils/Utils.h" #include "DataFormatsITSMFT/NoiseMap.h" @@ -102,6 +102,8 @@ void CheckTileNumbering(const std::string& inputGeom = "", const std::string& de Int_t colors[NRGBs] = {kWhite, kRed, kGray}; TColor::SetPalette(NRGBs, colors, 1.0); + std::array mMosaixSegmentations{0, 1, 2}; + const float phiOffsetL0 = std::asin(o2::its3::constants::equatorialGap / 2.f / o2::its3::constants::radii[0]); const float phiOffsetL1 = std::asin(o2::its3::constants::equatorialGap / 2.f / o2::its3::constants::radii[1]); const float phiOffsetL2 = std::asin(o2::its3::constants::equatorialGap / 2.f / o2::its3::constants::radii[2]); @@ -142,7 +144,7 @@ void CheckTileNumbering(const std::string& inputGeom = "", const std::string& de for (unsigned int iDet{0}; iDet <= o2::its3::constants::detID::l2IDEnd; ++iDet) { int sensorID = o2::its3::constants::detID::getSensorID(iDet); int layerID = o2::its3::constants::detID::getDetID2Layer(iDet); - o2::its3::SuperSegmentations[layerID].flatToCurved(xFlat, 0., x, y); + mMosaixSegmentations[layerID].flatToCurved(xFlat, 0., x, y); o2::math_utils::Point3D locC{x, y, z}; auto gloC = gman->getMatrixL2G(iDet)(locC); float phi = o2::math_utils::to02Pi(std::atan2(gloC.Y(), gloC.X())); diff --git a/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C index f151de72c8ac1..c124481cc6f76 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C +++ b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C @@ -31,7 +31,7 @@ #include "DataFormatsITSMFT/ROFRecord.h" #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" #include "ITS3Reconstruction/TopologyDictionary.h" #include "ITSBase/GeometryTGeo.h" @@ -86,7 +86,6 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", using namespace o2::base; using o2::itsmft::Hit; - using SuperSegmentation = o2::its3::SegmentationSuperAlpide; using Segmentation = o2::itsmft::SegmentationAlpide; using o2::itsmft::CompClusterExt; using ROFRec = o2::itsmft::ROFRecord; @@ -97,6 +96,8 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", std::vector hitVecPool; std::vector mc2hitVec; + std::array mMosaixSegmentations{0, 1, 2}; + // Geometry o2::base::GeometryManager::loadGeometry(inputGeom); auto gman = o2::its::GeometryTGeo::Instance(); @@ -124,9 +125,9 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", TFile fileC(clusfile.data()); auto* clusTree = dynamic_cast(fileC.Get("o2sim")); std::vector* clusArr = nullptr; - clusTree->SetBranchAddress("IT3ClusterComp", &clusArr); + clusTree->SetBranchAddress("ITSClusterComp", &clusArr); std::vector* patternsPtr = nullptr; - auto pattBranch = clusTree->GetBranch("IT3ClusterPatt"); + auto pattBranch = clusTree->GetBranch("ITSClusterPatt"); if (pattBranch != nullptr) { pattBranch->SetAddress(&patternsPtr); } @@ -144,14 +145,14 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", // ROFrecords std::vector rofRecVec, *rofRecVecP = &rofRecVec; - clusTree->SetBranchAddress("IT3ClustersROF", &rofRecVecP); + clusTree->SetBranchAddress("ITSClustersROF", &rofRecVecP); // Cluster MC labels o2::dataformats::MCTruthContainer* clusLabArr = nullptr; std::vector mc2rofVec, *mc2rofVecP = &mc2rofVec; - if ((hitTree != nullptr) && (clusTree->GetBranch("IT3ClusterMCTruth") != nullptr)) { - clusTree->SetBranchAddress("IT3ClusterMCTruth", &clusLabArr); - clusTree->SetBranchAddress("IT3ClustersMC2ROF", &mc2rofVecP); + if ((hitTree != nullptr) && (clusTree->GetBranch("ITSClusterMCTruth") != nullptr)) { + clusTree->SetBranchAddress("ITSClusterMCTruth", &clusLabArr); + clusTree->SetBranchAddress("ITSClustersMC2ROF", &mc2rofVecP); } clusTree->GetEntry(0); @@ -188,7 +189,7 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", std::vector data(nChips); for (int iChip{0}; iChip < nChips; ++iChip) { auto& dat = data[iChip]; - int col{o2::its3::SegmentationSuperAlpide::mNCols}, row{o2::its3::SegmentationSuperAlpide::mNRows}; + int col{o2::its3::SegmentationMosaix::NCols}, row{o2::its3::SegmentationMosaix::NRows}; if (!o2::its3::constants::detID::isDetITS3(iChip)) { col = o2::itsmft::SegmentationAlpide::NCols; row = o2::itsmft::SegmentationAlpide::NRows; @@ -259,7 +260,7 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", const auto pattID = cluster.getPatternID(); const auto isIB = o2::its3::constants::detID::isDetITS3(chipID); const auto layer = gman->getLayer(chipID); - if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID)) { + if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID, isIB)) { continue; } const auto& lab = (clusLabArr->getLabels(clEntry))[0]; @@ -282,9 +283,9 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", o2::math_utils::Point3D locHMiddle; if (isIB) { float xFlat{0.}, yFlat{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(locHEnd.X(), locHEnd.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locHEnd.X(), locHEnd.Y(), xFlat, yFlat); locHEnd.SetXYZ(xFlat, yFlat, locHEnd.Z()); - o2::its3::SuperSegmentations[layer].curvedToFlat(locHStart.X(), locHStart.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locHStart.X(), locHStart.Y(), xFlat, yFlat); locHStart.SetXYZ(xFlat, yFlat, locHStart.Z()); } locHMiddle.SetXYZ(0.5f * (locHEnd.X() + locHStart.X()), 0.5f * (locHEnd.Y() + locHStart.Y()), 0.5f * (locHEnd.Z() + locHStart.Z())); @@ -292,10 +293,10 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", int rowHS, colHS, rowHM, colHM, rowHE, colHE, colC, rowC; bool v1, v2, v3, v4; if (isIB) { - v1 = o2::its3::SuperSegmentations[layer].localToDetector(locHStart.X(), locHStart.Z(), rowHS, colHS); - v2 = o2::its3::SuperSegmentations[layer].localToDetector(locHMiddle.X(), locHMiddle.Z(), rowHM, colHM); - v3 = o2::its3::SuperSegmentations[layer].localToDetector(locHEnd.X(), locHEnd.Z(), rowHE, colHE); - v4 = o2::its3::SuperSegmentations[layer].localToDetector(locC.X(), locC.Z(), rowC, colC); + v1 = mMosaixSegmentations[layer].localToDetector(locHStart.X(), locHStart.Z(), rowHS, colHS); + v2 = mMosaixSegmentations[layer].localToDetector(locHMiddle.X(), locHMiddle.Z(), rowHM, colHM); + v3 = mMosaixSegmentations[layer].localToDetector(locHEnd.X(), locHEnd.Z(), rowHE, colHE); + v4 = mMosaixSegmentations[layer].localToDetector(locC.X(), locC.Z(), rowC, colC); } else { v1 = o2::itsmft::SegmentationAlpide::localToDetector(locHStart.X(), locHStart.Z(), rowHS, colHS); v2 = o2::itsmft::SegmentationAlpide::localToDetector(locHMiddle.X(), locHMiddle.Z(), rowHM, colHM); @@ -315,7 +316,7 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", data[chipID].cog->AddPoint(colC, rowC); constexpr float delta = 1e-2; - const auto& patt = dict.getPattern(cluster.getPatternID()); + const auto& patt = dict.getPattern(cluster.getPatternID(), isIB); auto box = new TBox( cluster.getCol() - delta - 0.5, cluster.getRow() - delta - 0.5, @@ -338,8 +339,8 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", } auto& dat = data[iChip]; gFile->cd(); - /* auto path = gman->getMatrixPath(iChip); */ - TString path; // TODO wrong use above + auto path = gman->getMatrixPath(iChip); + /*TString path; // TODO wrong use above*/ const std::string cpath{path.Data() + 39, path.Data() + path.Length()}; const std::filesystem::path p{cpath}; if (oFile->mkdir(p.parent_path().c_str(), "", true) == nullptr) { diff --git a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C index d8783ba7c8fb9..cc241afb3357a 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C @@ -34,7 +34,7 @@ #include "DetectorsCommonDataFormats/DetID.h" #include "ITSBase/GeometryTGeo.h" #include "ITSMFTBase/SegmentationAlpide.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "DataFormatsITSMFT/CompCluster.h" #include "DataFormatsITSMFT/ClusterTopology.h" #include "ITS3Reconstruction/TopologyDictionary.h" @@ -60,14 +60,13 @@ void CreateDictionariesITS3(bool saveDeltas = false, std::string collContextfile = "collisioncontext.root", std::string inputGeom = "", float checkOutliers = 2., // reject outliers (MC dX or dZ exceeds row/col span by a factor above the threshold) - float minPtMC = 0.01) // account only MC hits with pT above threshold + float minPtMC = 0.1) // account only MC hits with pT above threshold { const int QEDSourceID = 99; // Clusters from this MC source correspond to QED electrons using namespace o2::base; using namespace o2::its; - using o2::its3::SegmentationSuperAlpide; using Segmentation = o2::itsmft::SegmentationAlpide; using o2::its3::BuildTopologyDictionary; using o2::itsmft::ClusterTopology; @@ -82,12 +81,14 @@ void CreateDictionariesITS3(bool saveDeltas = false, std::vector hitVecPool; std::vector mc2hitVec; o2::its3::TopologyDictionary clusDictOld; + std::array mMosaixSegmentations{0, 1, 2}; if (!clusDictFile.empty()) { clusDictOld.readFromFile(clusDictFile); - LOGP(info, "Loaded external cluster dictionary with {} entries from {}", clusDictOld.getSize(), clusDictFile); + LOGP(info, "Loaded external cluster dictionary with {} IB/{} OBentries from {}", clusDictOld.getSize(true), clusDictOld.getSize(false), clusDictFile); } - ULong_t cOk{0}, cOutliers{0}, cFailedMC{0}; + ULong_t cOkIB{0}, cOutliersIB{0}, cFailedMCIB{0}; + ULong_t cOkOB{0}, cOutliersOB{0}, cFailedMCOB{0}; TFile* fout = nullptr; TNtuple* nt = nullptr; @@ -233,17 +234,18 @@ void CreateDictionariesITS3(bool saveDeltas = false, const auto& cluster = (*clusArr)[clEntry]; o2::itsmft::ClusterPattern pattern; + bool ib = o2::its3::constants::detID::isDetITS3(cluster.getChipID()); if (cluster.getPatternID() != CompCluster::InvalidPatternID) { - if (clusDictOld.getSize() == 0) { + if (clusDictOld.getSize(ib) == 0) { LOG(error) << "Encountered patternID = " << cluster.getPatternID() << " != " << CompCluster::InvalidPatternID; LOG(error) << "Clusters have already been generated with a dictionary which was not provided"; return; } - if (clusDictOld.isGroup(cluster.getPatternID())) { + if (clusDictOld.isGroup(cluster.getPatternID(), ib)) { pattern.acquirePattern(pattIdx); } else { - pattern = clusDictOld.getPattern(cluster.getPatternID()); + pattern = clusDictOld.getPattern(cluster.getPatternID(), ib); } } else { pattern.acquirePattern(pattIdx); @@ -270,44 +272,43 @@ void CreateDictionariesITS3(bool saveDeltas = false, o2::math_utils::Vector3D xyzLocM; xyzLocM.SetCoordinates(0.5f * (xyzLocE.X() + xyzLocS.X()), 0.5f * (xyzLocE.Y() + xyzLocS.Y()), 0.5f * (xyzLocE.Z() + xyzLocS.Z())); auto locC = o2::its3::TopologyDictionary::getClusterCoordinates(cluster, pattern, false); - bool isIB = o2::its3::constants::detID::isDetITS3(chipID); int layer = gman->getLayer(chipID); - if (isIB) { + if (ib) { float xFlat{0.}, yFlat{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); xyzLocM.SetCoordinates(xFlat, yFlat, xyzLocM.Z()); - o2::its3::SuperSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlat, yFlat); locC.SetCoordinates(xFlat, yFlat, locC.Z()); } dX = xyzLocM.X() - locC.X(); dZ = xyzLocM.Z() - locC.Z(); - dX /= (isIB) ? o2::its3::SegmentationSuperAlpide::mPitchRow : o2::itsmft::SegmentationAlpide::PitchRow; - dZ /= (isIB) ? o2::its3::SegmentationSuperAlpide::mPitchCol : o2::itsmft::SegmentationAlpide::PitchCol; + dX /= (ib) ? o2::its3::SegmentationMosaix::PitchRow : o2::itsmft::SegmentationAlpide::PitchRow; + dZ /= (ib) ? o2::its3::SegmentationMosaix::PitchCol : o2::itsmft::SegmentationAlpide::PitchCol; if (saveDeltas) { nt->Fill(topology.getHash(), dX, dZ); } if (checkOutliers > 0.) { if (bool bX = std::abs(dX) > topology.getRowSpan() * checkOutliers, bZ = std::abs(dZ) > topology.getColumnSpan() * checkOutliers; bX || bZ) { // ignore outlier - ++cOutliers; + (ib) ? ++cOutliersIB : ++cOutliersOB; LOGP(debug, "Ignored Value dX={} > {} * {} -> {}", dX, topology.getRowSpan(), checkOutliers, bX); LOGP(debug, "Ignored Value dZ={} > {} * {} -> {}", dZ, topology.getColumnSpan(), checkOutliers, bZ); dX = dZ = BuildTopologyDictionary::IgnoreVal; } else { - ++cOk; + (ib) ? ++cOkIB : ++cOkOB; } } } } else { /* LOGP(info, " Failed to find MC hit entry for Tr: {} chipID: {}", trID, chipID); */ /* lab.print(); */ - ++cFailedMC; + (ib) ? ++cFailedMCIB : ++cFailedMCOB; } - signalDictionary.accountTopology(topology, dX, dZ); + signalDictionary.accountTopology(topology, ib, dX, dZ); } else { - noiseDictionary.accountTopology(topology, dX, dZ); + noiseDictionary.accountTopology(topology, ib, dX, dZ); } } - completeDictionary.accountTopology(topology, dX, dZ); + completeDictionary.accountTopology(topology, ib, dX, dZ); } // clean MC cache for events which are not needed anymore @@ -323,12 +324,14 @@ void CreateDictionariesITS3(bool saveDeltas = false, } } - LOGP(info, "Clusters: {} okay (failed MCHit2Clus {}); outliers {}", cOk, cFailedMC, cOutliers); + LOGP(info, "IB Clusters: {} okay (failed MCHit2Clus {}); outliers {}", cOkIB, cFailedMCIB, cOutliersIB); + LOGP(info, "OB Clusters: {} okay (failed MCHit2Clus {}); outliers {}", cOkOB, cFailedMCOB, cOutliersOB); auto dID = o2::detectors::DetID::IT3; LOGP(info, "Complete Dictionary:"); - completeDictionary.setThreshold(probThreshold); + completeDictionary.setThreshold(probThreshold, true); + completeDictionary.setThreshold(probThreshold, false); completeDictionary.groupRareTopologies(); completeDictionary.printDictionaryBinary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "")); completeDictionary.printDictionary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "", "txt")); @@ -336,24 +339,34 @@ void CreateDictionariesITS3(bool saveDeltas = false, TFile histogramOutput("histograms.root", "recreate"); TCanvas* cComplete = new TCanvas("cComplete", "Distribution of all the topologies"); - cComplete->cd(); - cComplete->SetLogy(); - TH1F* hComplete = completeDictionary.getDictionary().getTopologyDistribution("hComplete"); - hComplete->SetDirectory(nullptr); - hComplete->Draw("hist"); - hComplete->Write(); + cComplete->Divide(2, 1); + cComplete->cd(1); + TH1F* hCompleteIB = completeDictionary.getDictionary().getTopologyDistribution("hCompleteInnerBarrel", true); + hCompleteIB->SetDirectory(nullptr); + hCompleteIB->Draw("hist"); + gPad->SetLogy(); + cComplete->cd(2); + TH1F* hCompleteOB = completeDictionary.getDictionary().getTopologyDistribution("hCompleteOuterBarrel", false); + hCompleteOB->SetDirectory(nullptr); + hCompleteOB->Draw("hist"); + gPad->SetLogy(); + histogramOutput.cd(); + hCompleteIB->Write(); + hCompleteOB->Write(); cComplete->Write(); if (clusLabArr) { LOGP(info, "Noise Dictionary:"); - noiseDictionary.setThreshold(0.0001); + noiseDictionary.setThreshold(0.0001, true); + noiseDictionary.setThreshold(0.0001, false); noiseDictionary.groupRareTopologies(); noiseDictionary.printDictionaryBinary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "noiseClusTopo")); noiseDictionary.printDictionary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "noiseClusTopo", "txt")); noiseDictionary.saveDictionaryRoot(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "noiseClusTopo", "root")); LOGP(info, "Signal Dictionary:"); - signalDictionary.setThreshold(0.0001); + signalDictionary.setThreshold(0.0001, true); + signalDictionary.setThreshold(0.0001, false); signalDictionary.groupRareTopologies(); signalDictionary.printDictionaryBinary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "signal")); signalDictionary.printDictionary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "signal", "txt")); @@ -361,26 +374,42 @@ void CreateDictionariesITS3(bool saveDeltas = false, LOGP(info, "Plotting Channels"); auto cNoise = new TCanvas("cNoise", "Distribution of noise topologies"); - cNoise->cd(); - cNoise->SetLogy(); - auto hNoise = noiseDictionary.getDictionary().getTopologyDistribution("hNoise"); - hNoise->SetDirectory(nullptr); - hNoise->Draw("hist"); + cNoise->Divide(2, 1); + cNoise->cd(1); + auto hNoiseIB = noiseDictionary.getDictionary().getTopologyDistribution("hNoiseInnerBarrel", true); + hNoiseIB->SetDirectory(nullptr); + hNoiseIB->Draw("hist"); + gPad->SetLogy(); + cNoise->cd(2); + auto hNoiseOB = noiseDictionary.getDictionary().getTopologyDistribution("hNoiseOuterBarrel", false); + hNoiseOB->SetDirectory(nullptr); + hNoiseOB->Draw("hist"); + gPad->SetLogy(); histogramOutput.cd(); - hNoise->Write(); + hNoiseIB->Write(); + hNoiseOB->Write(); cNoise->Write(); + auto cSignal = new TCanvas("cSignal", "cSignal"); - cSignal->cd(); + cSignal->Divide(2, 1); + cSignal->cd(1); + auto hSignalIB = signalDictionary.getDictionary().getTopologyDistribution("hSignalInnerBarrel", true); + hSignalIB->SetDirectory(nullptr); + hSignalIB->Draw("hist"); + gPad->SetLogy(); + cSignal->cd(2); cSignal->SetLogy(); - auto hSignal = signalDictionary.getDictionary().getTopologyDistribution("hSignal"); - hSignal->SetDirectory(nullptr); - hSignal->Draw("hist"); + auto hSignalOB = signalDictionary.getDictionary().getTopologyDistribution("hSignalOuterBarrel", false); + hSignalOB->SetDirectory(nullptr); + hSignalOB->Draw("hist"); + gPad->SetLogy(); histogramOutput.cd(); - hSignal->Write(); + hSignalIB->Write(); + hSignalOB->Write(); cSignal->Write(); - sw.Stop(); - sw.Print(); } + sw.Stop(); + sw.Print(); if (saveDeltas) { fout->cd(); nt->Write(); diff --git a/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C b/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C index 1a0ec73e34f31..4b54bbced2929 100644 --- a/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C +++ b/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C @@ -21,7 +21,7 @@ #include "TList.h" #endif -void TestSensorGeometry(bool checkFull = false) +void TestSensorGeometry(bool draw = false, bool checkFull = false) { gGeoManager = new TGeoManager("simple", "Simple geometry"); TGeoMaterial* matVacuum = new TGeoMaterial("Vacuum", 0, 0, 0); @@ -30,8 +30,7 @@ void TestSensorGeometry(bool checkFull = false) auto top = gGeoManager->MakeBox("TOP", Vacuum, 270., 270., 120.); gGeoManager->SetTopVolume(top); - o2::its3::ITS3Layer layer0{0, top, nullptr, - o2::its3::ITS3Layer::BuildLevel::kLayer, true}; + o2::its3::ITS3Layer layer0{2, top, nullptr, o2::its3::ITS3Layer::BuildLevel::kLayer, true}; // Print available medias TIter next{gGeoManager->GetListOfMedia()}; @@ -42,13 +41,17 @@ void TestSensorGeometry(bool checkFull = false) gGeoManager->CloseGeometry(); gGeoManager->SetVisLevel(99); + if (draw) { + gGeoManager->Draw("ogl"); + } + if (checkFull) { gGeoManager->CheckGeometryFull(); - } - gGeoManager->CheckOverlaps(0.0001); - TIter nextOverlap{gGeoManager->GetListOfOverlaps()}; - while ((obj = (TObject*)nextOverlap())) { - LOGP(info, "Overlap in {}", obj->GetName()); + gGeoManager->CheckOverlaps(0.00001); + TIter nextOverlap{gGeoManager->GetListOfOverlaps()}; + while ((obj = (TObject*)nextOverlap())) { + LOGP(info, "Overlap in {}", obj->GetName()); + } } std::unique_ptr f{TFile::Open("geo.root", "RECREATE")}; diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h index 7df603bb29fb2..662c58aeb2cd8 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h @@ -24,31 +24,47 @@ namespace o2::its3 class BuildTopologyDictionary { + using TopoInfo = std::unordered_map; + using TopoStat = std::map; + using TopoFreq = std::vector>; + public: static constexpr float IgnoreVal = 999.; - void accountTopology(const itsmft::ClusterTopology& cluster, float dX = IgnoreVal, float dZ = IgnoreVal); - void setNCommon(unsigned int nCommon); // set number of common topologies - void setThreshold(double thr); - void setThresholdCumulative(double cumulative); // Considering the integral + void accountTopology(const itsmft::ClusterTopology& cluster, bool IB, float dX = IgnoreVal, float dZ = IgnoreVal); + void setNCommon(unsigned int nCommon, bool IB); // set number of common topologies + void setThreshold(double thr, bool IB); + void setThresholdCumulative(double cumulative, bool IB); // Considering the integral void groupRareTopologies(); - friend std::ostream& operator<<(std::ostream& os, const BuildTopologyDictionary& BD); void printDictionary(const std::string& fname); void printDictionaryBinary(const std::string& fname); void saveDictionaryRoot(const std::string& fname); - unsigned int getTotClusters() const { return mTotClusters; } - unsigned int getNotInGroups() const { return mNCommonTopologies; } - TopologyDictionary getDictionary() const { return mDictionary; } + [[nodiscard]] unsigned int getTotClusters(bool IB) const { return (IB) ? mTotClustersIB : mTotClustersOB; } + [[nodiscard]] unsigned int getNotInGroups(bool IB) const { return (IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB; } + [[nodiscard]] const TopologyDictionary& getDictionary() const { return mDictionary; } + + friend std::ostream& operator<<(std::ostream& os, const BuildTopologyDictionary& BD); private: - TopologyDictionary mDictionary; ///< Dictionary of topologies - std::map mTopologyMap; //! Temporary map of type - std::vector> mTopologyFrequency; //! , needed to define threshold - unsigned int mTotClusters{0}; - unsigned int mNCommonTopologies{0}; - double mFrequencyThreshold{0.}; - - std::unordered_map mMapInfo; + void accountTopologyImpl(const itsmft::ClusterTopology& cluster, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ntot, float sigmaX, float sigmaZ, float dX, float dZ); + void setNCommonImpl(unsigned int ncom, TopoFreq& tfreq, TopoStat& tstat, unsigned int& ncommon, unsigned int ntot); + void setThresholdImpl(double thr, TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, unsigned int ntot); + void setThresholdCumulativeImpl(double cumulative, TopoFreq& tfreq, unsigned int& ncommon, double& freqthres, unsigned int ntot); + void groupRareTopologiesImpl(TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, TopologyDictionaryData& data, unsigned int ntot); + + TopologyDictionary mDictionary; ///< Dictionary of topologies + unsigned int mTotClustersIB{0}; + unsigned int mTotClustersOB{0}; + unsigned int mNCommonTopologiesIB{0}; + unsigned int mNCommonTopologiesOB{0}; + double mFrequencyThresholdIB{0.}; + double mFrequencyThresholdOB{0.}; + TopoInfo mMapInfoIB; + TopoInfo mMapInfoOB; + TopoStat mTopologyMapIB; //! IB Temporary map of type + TopoStat mTopologyMapOB; //! OB Temporary map of type + TopoFreq mTopologyFrequencyIB; //! IB , needed to define threshold + TopoFreq mTopologyFrequencyOB; //! OB , needed to define threshold ClassDefNV(BuildTopologyDictionary, 3); }; diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h index 20acf07d4f547..a81db09217e9b 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h @@ -207,7 +207,7 @@ class Clusterer template static void streamCluster(const std::vector& pixbuf, const std::array* lblBuff, const BBox& bbox, const its3::LookUp& pattIdConverter, - VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isHuge = false); + VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isIB, bool isHuge = false); bool isContinuousReadOut() const { return mContinuousReadout; } void setContinuousReadOut(bool v) { mContinuousReadout = v; } @@ -230,7 +230,7 @@ class Clusterer ///< load the dictionary of cluster topologies void setDictionary(const its3::TopologyDictionary* dict) { - LOGP(info, "Setting TopologyDictionary with size={}", dict->getSize()); + LOGP(info, "Setting TopologyDictionary with IB size={} & OB size={}", dict->getSize(true), dict->getSize(false)); mPattIdConverter.setDictionary(dict); // dict->print(); } @@ -274,7 +274,7 @@ class Clusterer template void Clusterer::streamCluster(const std::vector& pixbuf, const std::array* lblBuff, const Clusterer::BBox& bbox, const its3::LookUp& pattIdConverter, - VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isHuge) + VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isIB, bool isHuge) { if (labelsClusPtr && lblBuff) { // MC labels were requested auto cnt = compClusPtr->size(); @@ -291,10 +291,10 @@ void Clusterer::streamCluster(const std::vector& pixbuf, const std::a int nbits = ir * colSpanW + ic; patt[nbits >> 3] |= (0x1 << (7 - (nbits % 8))); } - uint16_t pattID = (isHuge || pattIdConverter.size() == 0) ? CompCluster::InvalidPatternID : pattIdConverter.findGroupID(rowSpanW, colSpanW, patt.data()); + uint16_t pattID = (isHuge || pattIdConverter.size(isIB) == 0) ? CompCluster::InvalidPatternID : pattIdConverter.findGroupID(rowSpanW, colSpanW, isIB, patt.data()); uint16_t row = bbox.rowMin, col = bbox.colMin; LOGP(debug, "PattID: findGroupID({},{},{})={}", row, col, patt[0], pattID); - if (pattID == CompCluster::InvalidPatternID || pattIdConverter.isGroup(pattID)) { + if (pattID == CompCluster::InvalidPatternID || pattIdConverter.isGroup(pattID, isIB)) { if (pattID != CompCluster::InvalidPatternID) { // For groupped topologies, the reference pixel is the COG pixel float xCOG = 0., zCOG = 0.; diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h index 2407344aa0193..b9e7fd0f6ec39 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h @@ -16,14 +16,13 @@ #include "ITS3Reconstruction/TopologyDictionary.h" #include "ITStracking/TimeFrame.h" #include "ITStracking/IOUtils.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" namespace o2::its3::ioutils { -using SSAlpide = o2::its3::SegmentationSuperAlpide; -constexpr float DefClusErrorRow = o2::its3::SegmentationSuperAlpide::mPitchRow * 0.5; -constexpr float DefClusErrorCol = o2::its3::SegmentationSuperAlpide::mPitchCol * 0.5; +constexpr float DefClusErrorRow = o2::its3::SegmentationMosaix::PitchRow * 0.5; +constexpr float DefClusErrorCol = o2::its3::SegmentationMosaix::PitchCol * 0.5; constexpr float DefClusError2Row = DefClusErrorRow * DefClusErrorRow; constexpr float DefClusError2Col = DefClusErrorCol * DefClusErrorCol; @@ -31,13 +30,14 @@ template o2::math_utils::Point3D extractClusterData(const itsmft::CompClusterExt& c, iterator& iter, const its3::TopologyDictionary* dict, T& sig2y, T& sig2z) { auto pattID = c.getPatternID(); + auto ib = constants::detID::isDetITS3(c.getSensorID()); // Dummy COG errors (about half pixel size) - sig2y = (constants::detID::isDetITS3(c.getSensorID())) ? DefClusError2Row : o2::its::ioutils::DefClusError2Row; - sig2z = (constants::detID::isDetITS3(c.getSensorID())) ? DefClusError2Col : o2::its::ioutils::DefClusError2Col; + sig2y = (ib) ? DefClusError2Row : o2::its::ioutils::DefClusError2Row; + sig2z = (ib) ? DefClusError2Col : o2::its::ioutils::DefClusError2Col; if (pattID != itsmft::CompCluster::InvalidPatternID) { - sig2y = dict->getErr2X(pattID) * sig2y; // Error is given in detector coordinates - sig2z = dict->getErr2Z(pattID) * sig2z; - if (!dict->isGroup(pattID)) { + sig2y = dict->getErr2X(pattID, ib); + sig2z = dict->getErr2Z(pattID, ib); + if (!dict->isGroup(pattID, ib)) { return dict->getClusterCoordinates(c); } else { o2::itsmft::ClusterPattern patt(iter); @@ -53,13 +53,14 @@ template o2::math_utils::Point3D extractClusterData(const itsmft::CompClusterExt& c, iterator& iter, const its3::TopologyDictionary* dict, T& sig2y, T& sig2z, uint8_t& cls) { auto pattID = c.getPatternID(); + auto ib = constants::detID::isDetITS3(c.getSensorID()); auto iterC = iter; unsigned int clusterSize{999}; - if (pattID == itsmft::CompCluster::InvalidPatternID || dict->isGroup(pattID)) { + if (pattID == itsmft::CompCluster::InvalidPatternID || dict->isGroup(pattID, ib)) { o2::itsmft::ClusterPattern patt(iterC); clusterSize = patt.getNPixels(); } else { - clusterSize = dict->getNpixels(pattID); + clusterSize = dict->getNpixels(pattID, ib); } cls = static_cast(std::clamp(clusterSize, static_cast(std::numeric_limits::min()), static_cast(std::numeric_limits::max()))); return extractClusterData(c, iter, dict, sig2y, sig2z); diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h index 0fbecb41393ff..809a129a0debf 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h @@ -21,7 +21,6 @@ #ifndef ALICEO2_ITS3_LOOKUP_H #define ALICEO2_ITS3_LOOKUP_H -#include "DataFormatsITSMFT/ClusterTopology.h" #include "ITS3Reconstruction/TopologyDictionary.h" namespace o2::its3 @@ -32,20 +31,21 @@ class LookUp LookUp() = default; LookUp(std::string fileName); static int groupFinder(int nRow, int nCol); - int findGroupID(int nRow, int nCol, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const; - int getTopologiesOverThreshold() const { return mTopologiesOverThreshold; } + int findGroupID(int nRow, int nCol, bool IB, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const; + int getTopologiesOverThreshold(bool IB) const { return (IB) ? mTopologiesOverThresholdIB : mTopologiesOverThresholdOB; } void loadDictionary(std::string fileName); void setDictionary(const TopologyDictionary* dict); - bool isGroup(int id) const { return mDictionary.isGroup(id); } - int size() const { return mDictionary.getSize(); } - auto getPattern(int id) const { return mDictionary.getPattern(id); } - auto getDictionaty() const { return mDictionary; } + auto getDictionary() const { return mDictionary; } + bool isGroup(int id, bool IB) const { return mDictionary.isGroup(id, IB); } + int size(bool IB) const { return mDictionary.getSize(IB); } + auto getPattern(int id, bool IB) const { return mDictionary.getPattern(id, IB); } private: - TopologyDictionary mDictionary{}; - int mTopologiesOverThreshold{0}; + TopologyDictionary mDictionary; + int mTopologiesOverThresholdIB{0}; + int mTopologiesOverThresholdOB{0}; - ClassDefNV(LookUp, 2); + ClassDefNV(LookUp, 3); }; } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h index a11131ed9f61f..d5f5721170aa7 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h @@ -24,6 +24,18 @@ namespace o2::its3 class BuildTopologyDictionary; class LookUp; +struct TopologyDictionaryData { + static constexpr int STopoSize{(8 * 255) + 1}; + std::array mSmallTopologiesLUT{}; ///< Look-Up Table for the topologies with 1-byte linearised matrix + std::vector mVectorOfIDs; ///< Vector of topologies and groups + std::unordered_map mCommonMap; ///< Map of pair + std::unordered_map mGroupMap; ///< Map of pair + + void print() const noexcept; + + ClassDefNV(TopologyDictionaryData, 1); +}; + class TopologyDictionary { public: @@ -32,91 +44,108 @@ class TopologyDictionary /// constexpr for the definition of the groups of rare topologies. /// The attritbution of the group ID is stringly dependent on the following parameters: it must be a power of 2. - static constexpr int RowClassSpan = 4; ///< Row span of the classes of rare topologies - static constexpr int ColClassSpan = 4; ///< Column span of the classes of rare topologies - static constexpr int MaxNumberOfRowClasses = 1 + (itsmft::ClusterPattern::MaxRowSpan - 1) / RowClassSpan; ///< Maximum number of row classes for the groups of rare topologies - static constexpr int MaxNumberOfColClasses = 1 + (itsmft::ClusterPattern::MaxColSpan - 1) / ColClassSpan; ///< Maximum number of col classes for the groups of rare topologies - static constexpr int NumberOfRareGroups = MaxNumberOfRowClasses * MaxNumberOfColClasses; ///< Number of entries corresponding to groups of rare topologies (those whos matrix exceed the max number of bytes are empty). + static constexpr int RowClassSpan = 4; ///< Row span of the classes of rare topologies + static constexpr int ColClassSpan = 4; ///< Column span of the classes of rare topologies + static constexpr int MaxNumberOfRowClasses = 1 + ((itsmft::ClusterPattern::MaxRowSpan - 1) / RowClassSpan); ///< Maximum number of row classes for the groups of rare topologies + static constexpr int MaxNumberOfColClasses = 1 + ((itsmft::ClusterPattern::MaxColSpan - 1) / ColClassSpan); ///< Maximum number of col classes for the groups of rare topologies + static constexpr int NumberOfRareGroups = MaxNumberOfRowClasses * MaxNumberOfColClasses; ///< Number of entries corresponding to groups of rare topologies (those whos matrix exceed the max number of bytes are empty). + /// Resets internal structures + void reset() noexcept; + void resetMaps(bool IB = true) noexcept; /// Prints the dictionary friend std::ostream& operator<<(std::ostream& os, const its3::TopologyDictionary& dictionary); /// Prints the dictionary in a binary file void writeBinaryFile(const std::string& outputFile); /// Reads the dictionary from a binary file - int readBinaryFile(const std::string& fileName); - - int readFromFile(const std::string& fileName); + void readBinaryFile(const std::string& fileName); + void readFromFile(const std::string& fileName); + void print() const noexcept; /// Returns the x position of the COG for the n_th element - inline float getXCOG(int n) const + [[nodiscard]] float getXCOG(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mXCOG; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mXCOG; } /// Returns the error on the x position of the COG for the n_th element - inline float getErrX(int n) const + [[nodiscard]] float getErrX(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErrX; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErrX; } /// Returns the z position of the COG for the n_th element - inline float getZCOG(int n) const + [[nodiscard]] float getZCOG(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mZCOG; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mZCOG; } /// Returns the error on the z position of the COG for the n_th element - inline float getErrZ(int n) const + [[nodiscard]] float getErrZ(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErrZ; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErrZ; } /// Returns the error^2 on the x position of the COG for the n_th element - inline float getErr2X(int n) const + [[nodiscard]] float getErr2X(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErr2X; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErr2X; } /// Returns the error^2 on the z position of the COG for the n_th element - inline float getErr2Z(int n) const + [[nodiscard]] float getErr2Z(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErr2Z; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErr2Z; } /// Returns the hash of the n_th element - inline unsigned long getHash(int n) const + [[nodiscard]] unsigned long getHash(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mHash; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mHash; } /// Returns the number of fired pixels of the n_th element - inline int getNpixels(int n) const + [[nodiscard]] int getNpixels(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mNpixels; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mNpixels; } /// Returns the frequency of the n_th element; - inline double getFrequency(int n) const + [[nodiscard]] double getFrequency(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mFrequency; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mFrequency; } /// Returns true if the element corresponds to a group of rare topologies - inline bool isGroup(int n) const + [[nodiscard]] bool isGroup(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mIsGroup; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mIsGroup; } /// Returns the pattern of the topology - inline const itsmft::ClusterPattern& getPattern(int n) const + [[nodiscard]] const itsmft::ClusterPattern& getPattern(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mPattern; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mPattern; } /// Fills a hostogram with the distribution of the IDs - TH1F* getTopologyDistribution(const std::string_view hname = "h_topo_dist") const; + [[nodiscard]] TH1F* getTopologyDistribution(const std::string_view hname, bool IB = true) const; /// Returns the number of elements in the dicionary; - int getSize() const { return (int)mVectorOfIDs.size(); } + [[nodiscard]] int getSize(bool IB) const + { + return static_cast((IB) ? mDataIB.mVectorOfIDs.size() : mDataOB.mVectorOfIDs.size()); + } /// Returns the local position of a compact cluster /// Returns the local position of a compact cluster @@ -133,13 +162,10 @@ class TopologyDictionary friend its3::LookUp; private: - static constexpr int STopoSize{8 * 255 + 1}; - std::unordered_map mCommonMap{}; ///< Map of pair - std::unordered_map mGroupMap{}; ///< Map of pair - int mSmallTopologiesLUT[STopoSize]{}; ///< Look-Up Table for the topologies with 1-byte linearised matrix - std::vector mVectorOfIDs{}; ///< Vector of topologies and groups + TopologyDictionaryData mDataIB; + TopologyDictionaryData mDataOB; - ClassDefNV(TopologyDictionary, 3); + ClassDefNV(TopologyDictionary, 4); }; } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx index 87ad450eecd9e..f7eec52f9434a 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx @@ -14,7 +14,9 @@ #include "ITS3Reconstruction/BuildTopologyDictionary.h" #include "ITS3Reconstruction/LookUp.h" #include "DataFormatsITSMFT/CompCluster.h" -#include "ITS3Base/SegmentationSuperAlpide.h" + +#include "ITSMFTBase/SegmentationAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "TFile.h" @@ -22,14 +24,25 @@ ClassImp(o2::its3::BuildTopologyDictionary); namespace o2::its3 { -void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& cluster, float dX, float dZ) +void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& cluster, bool IB, float dX, float dZ) { - mTotClusters++; + accountTopologyImpl(cluster, + ((IB) ? mMapInfoIB : mMapInfoOB), + ((IB) ? mTopologyMapIB : mTopologyMapOB), + ((IB) ? mTotClustersIB : mTotClustersOB), + ((IB) ? SegmentationMosaix::PitchRow : itsmft::SegmentationAlpide::PitchRow), + ((IB) ? SegmentationMosaix::PitchCol : itsmft::SegmentationAlpide::PitchCol), + dX, dZ); +} + +void BuildTopologyDictionary::accountTopologyImpl(const itsmft::ClusterTopology& cluster, TopoInfo& tinfo, TopoStat& tstat, unsigned int& tot, float sigmaX, float sigmaZ, float dX, float dZ) +{ + ++tot; bool useDf = dX < IgnoreVal / 2; // we may need to account the frequency but to not update the centroid // std::pair::iterator,bool> ret; // auto ret = mTopologyMap.insert(std::make_pair(cluster.getHash(), std::make_pair(cluster, 1))); - auto& topoStat = mTopologyMap[cluster.getHash()]; + auto& topoStat = tstat[cluster.getHash()]; topoStat.countsTotal++; if (topoStat.countsTotal == 1) { // a new topology is inserted topoStat.topology = cluster; @@ -45,14 +58,14 @@ void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& clu topInf.mZmean = dZ; topoStat.countsWithBias = 1; } else { // assign expected sigmas from the pixel X, Z sizes - topInf.mXsigma2 = 1.f / 12.f / (float)std::min(10, topInf.mSizeX); - topInf.mZsigma2 = 1.f / 12.f / (float)std::min(10, topInf.mSizeZ); + topInf.mXsigma2 = sigmaX * sigmaX / 12.f / (float)std::min(10, topInf.mSizeX); + topInf.mZsigma2 = sigmaZ * sigmaZ / (float)std::min(10, topInf.mSizeZ); } - mMapInfo.emplace(cluster.getHash(), topInf); + tinfo.emplace(cluster.getHash(), topInf); } else { if (useDf) { auto num = topoStat.countsWithBias++; - auto ind = mMapInfo.find(cluster.getHash()); + auto ind = tinfo.find(cluster.getHash()); float tmpxMean = ind->second.mXmean; float newxMean = ind->second.mXmean = ((tmpxMean)*num + dX) / (num + 1); float tmpxSigma2 = ind->second.mXsigma2; @@ -65,101 +78,135 @@ void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& clu } } -void BuildTopologyDictionary::setThreshold(double thr) +void BuildTopologyDictionary::setNCommon(unsigned int nCommon, bool IB) +{ + mDictionary.resetMaps(IB); + + auto& freqTopo = ((IB) ? mTopologyFrequencyIB : mTopologyFrequencyOB); + auto& freqThres = ((IB) ? mFrequencyThresholdIB : mFrequencyThresholdOB); + auto& comTopo = ((IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB); + auto ntot = ((IB) ? mTotClustersIB : mTotClustersOB); + + setNCommonImpl(nCommon, + freqTopo, + ((IB) ? mTopologyMapIB : mTopologyMapOB), + comTopo, + ntot); + // Recaculate also the threshold + freqThres = ((double)freqTopo[comTopo - 1].first) / ntot; +} + +void BuildTopologyDictionary::setNCommonImpl(unsigned int ncom, TopoFreq& tfreq, TopoStat& tstat, unsigned int& ncommon, unsigned int ntot) { - mTopologyFrequency.clear(); - for (auto&& p : mTopologyMap) { // p is pair - mTopologyFrequency.emplace_back(p.second.countsTotal, p.first); + if (ncom >= itsmft::CompCluster::InvalidPatternID) { + LOGP(warning, "Redefining nCommon from {} to {} to be below InvalidPatternID", ncom, itsmft::CompCluster::InvalidPatternID - 1); + ncom = itsmft::CompCluster::InvalidPatternID - 1; + } + tfreq.clear(); + for (auto&& p : tstat) { // p os pair + tfreq.emplace_back(p.second.countsTotal, p.first); } - std::sort(mTopologyFrequency.begin(), mTopologyFrequency.end(), + std::sort(tfreq.begin(), tfreq.end(), [](const std::pair& couple1, const std::pair& couple2) { return (couple1.first > couple2.first); }); - mNCommonTopologies = 0; - mDictionary.mCommonMap.clear(); - mDictionary.mGroupMap.clear(); - mFrequencyThreshold = thr; - for (auto& q : mTopologyFrequency) { - if (((double)q.first) / mTotClusters > thr) { - mNCommonTopologies++; + ncommon = ncom; +} + +void BuildTopologyDictionary::setThreshold(double thr, bool IB) +{ + mDictionary.resetMaps(IB); + setThresholdImpl(thr, + ((IB) ? mTopologyFrequencyIB : mTopologyFrequencyOB), + ((IB) ? mMapInfoIB : mMapInfoOB), + ((IB) ? mTopologyMapIB : mTopologyMapOB), + ((IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB), + ((IB) ? mFrequencyThresholdIB : mFrequencyThresholdOB), + ((IB) ? mTotClustersIB : mTotClustersOB)); +} + +void BuildTopologyDictionary::setThresholdImpl(double thr, TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, unsigned int ntot) +{ + setNCommonImpl(0, tfreq, tstat, ncommon, ntot); + freqthres = thr; + for (auto& q : tfreq) { + if (((double)q.first) / ntot > thr) { + ++ncommon; } else { break; } } - if (mNCommonTopologies >= itsmft::CompCluster::InvalidPatternID) { - mFrequencyThreshold = ((double)mTopologyFrequency[itsmft::CompCluster::InvalidPatternID - 1].first) / mTotClusters; - LOGP(warning, "Redefining prob. threshould from {} to {} to be below InvalidPatternID (was {})", thr, mFrequencyThreshold, mNCommonTopologies); - mNCommonTopologies = itsmft::CompCluster::InvalidPatternID - 1; + if (ncommon >= itsmft::CompCluster::InvalidPatternID) { + freqthres = ((double)tfreq[itsmft::CompCluster::InvalidPatternID - 1].first) / ntot; + LOGP(warning, "Redefining prob. threshold from {} to {} to be below InvalidPatternID (was {})", thr, freqthres, ntot); + ncommon = itsmft::CompCluster::InvalidPatternID - 1; } } -void BuildTopologyDictionary::setNCommon(unsigned int nCommon) +void BuildTopologyDictionary::setThresholdCumulative(double cumulative, bool IB) { - if (nCommon >= itsmft::CompCluster::InvalidPatternID) { - LOGP(warning, "Redefining nCommon from {} to {} to be below InvalidPatternID", nCommon, itsmft::CompCluster::InvalidPatternID - 1); - nCommon = itsmft::CompCluster::InvalidPatternID - 1; - } - mTopologyFrequency.clear(); - for (auto&& p : mTopologyMap) { // p os pair - mTopologyFrequency.emplace_back(p.second.countsTotal, p.first); + if (cumulative <= 0. || cumulative >= 1.) { + cumulative = 0.99; } - std::sort(mTopologyFrequency.begin(), mTopologyFrequency.end(), - [](const std::pair& couple1, - const std::pair& couple2) { return (couple1.first > couple2.first); }); - mNCommonTopologies = nCommon; - mDictionary.mCommonMap.clear(); - mDictionary.mGroupMap.clear(); - mFrequencyThreshold = ((double)mTopologyFrequency[mNCommonTopologies - 1].first) / mTotClusters; + + auto& freqTopo = ((IB) ? mTopologyFrequencyIB : mTopologyFrequencyOB); + auto& freqThres = ((IB) ? mFrequencyThresholdIB : mFrequencyThresholdOB); + auto& statTopo = ((IB) ? mTopologyMapIB : mTopologyMapOB); + auto& comTopo = ((IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB); + auto ntot = ((IB) ? mTotClustersIB : mTotClustersOB); + + mDictionary.resetMaps(IB); + setNCommonImpl(0, freqTopo, statTopo, comTopo, ntot); + setThresholdCumulativeImpl(cumulative, freqTopo, comTopo, freqThres, ntot); } -void BuildTopologyDictionary::setThresholdCumulative(double cumulative) +void BuildTopologyDictionary::setThresholdCumulativeImpl(double cumulative, TopoFreq& tfreq, unsigned int& ncommon, double& freqthres, unsigned int ntot) { - mTopologyFrequency.clear(); - if (cumulative <= 0. || cumulative >= 1.) { - cumulative = 0.99; - } double totFreq = 0.; - for (auto&& p : mTopologyMap) { // p os pair - mTopologyFrequency.emplace_back(p.second.countsTotal, p.first); - } - std::sort(mTopologyFrequency.begin(), mTopologyFrequency.end(), - [](const std::pair& couple1, - const std::pair& couple2) { return (couple1.first > couple2.first); }); - mNCommonTopologies = 0; - mDictionary.mCommonMap.clear(); - mDictionary.mGroupMap.clear(); - for (auto& q : mTopologyFrequency) { - totFreq += ((double)(q.first)) / mTotClusters; + for (auto& q : tfreq) { + totFreq += ((double)(q.first)) / ntot; if (totFreq < cumulative) { - mNCommonTopologies++; - if (mNCommonTopologies >= itsmft::CompCluster::InvalidPatternID) { - totFreq -= ((double)(q.first)) / mTotClusters; - mNCommonTopologies--; + ++ncommon; + if (ncommon >= itsmft::CompCluster::InvalidPatternID) { + totFreq -= ((double)(q.first)) / ntot; + --ncommon; LOGP(warning, "Redefining cumulative threshould from {} to {} to be below InvalidPatternID)", cumulative, totFreq); } } else { break; } } - mFrequencyThreshold = ((double)(mTopologyFrequency[--mNCommonTopologies].first)) / mTotClusters; - while (std::fabs(((double)mTopologyFrequency[mNCommonTopologies].first) / mTotClusters - mFrequencyThreshold) < 1.e-15) { - mNCommonTopologies--; + freqthres = ((double)(tfreq[--ncommon].first)) / ntot; + while (std::fabs(((double)tfreq[ncommon--].first) / ntot - freqthres) < 1.e-15) { } - mFrequencyThreshold = ((double)mTopologyFrequency[mNCommonTopologies++].first) / mTotClusters; + freqthres = ((double)tfreq[ncommon++].first) / ntot; } void BuildTopologyDictionary::groupRareTopologies() { LOG(info) << "Dictionary finalisation"; - LOG(info) << "Number of clusters: " << mTotClusters; + LOG(info) << "Number of IB clusters: " << mTotClustersIB; + LOG(info) << "Number of OB clusters: " << mTotClustersOB; + + groupRareTopologiesImpl(mTopologyFrequencyIB, mMapInfoIB, mTopologyMapIB, mNCommonTopologiesIB, mFrequencyThresholdIB, mDictionary.mDataIB, mNCommonTopologiesIB); + groupRareTopologiesImpl(mTopologyFrequencyOB, mMapInfoOB, mTopologyMapOB, mNCommonTopologiesOB, mFrequencyThresholdOB, mDictionary.mDataOB, mNCommonTopologiesOB); + + LOG(info) << "Dictionay finalised"; + LOG(info) << "IB:"; + mDictionary.mDataIB.print(); + LOG(info) << "OB:"; + mDictionary.mDataOB.print(); +} +void BuildTopologyDictionary::groupRareTopologiesImpl(TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, TopologyDictionaryData& data, unsigned int ntot) +{ double totFreq = 0.; - for (unsigned int j = 0; j < mNCommonTopologies; j++) { + for (unsigned int j = 0; j < ncommon; j++) { itsmft::GroupStruct gr; - gr.mHash = mTopologyFrequency[j].second; - gr.mFrequency = ((double)(mTopologyFrequency[j].first)) / mTotClusters; + gr.mHash = tfreq[j].second; + gr.mFrequency = ((double)(tfreq[j].first)) / ntot; totFreq += gr.mFrequency; // rough estimation for the error considering a8 uniform distribution - const auto& topo = mMapInfo.find(gr.mHash)->second; + const auto& topo = tinfo.find(gr.mHash)->second; gr.mErrX = std::sqrt(topo.mXsigma2); gr.mErrZ = std::sqrt(topo.mZsigma2); gr.mErr2X = topo.mXsigma2; @@ -169,11 +216,11 @@ void BuildTopologyDictionary::groupRareTopologies() gr.mNpixels = topo.mNpixels; gr.mPattern = topo.mPattern; gr.mIsGroup = false; - mDictionary.mVectorOfIDs.push_back(gr); + data.mVectorOfIDs.push_back(gr); if (j == int(itsmft::CompCluster::InvalidPatternID - 1)) { LOGP(warning, "Limiting N unique topologies to {}, threshold freq. to {}, cumulative freq. to {} to be below InvalidPatternID", j, gr.mFrequency, totFreq); - mNCommonTopologies = j; - mFrequencyThreshold = gr.mFrequency; + ncommon = j; + freqthres = gr.mFrequency; break; } } @@ -193,8 +240,8 @@ void BuildTopologyDictionary::groupRareTopologies() // Create a structure for a group of rare topologies itsmft::GroupStruct gr; gr.mHash = (((unsigned long)(grNum)) << 32) & 0xffffffff00000000; - gr.mErrX = its3::TopologyDictionary::RowClassSpan / std::sqrt(12 * std::min(10, rowBinEdge)); - gr.mErrZ = its3::TopologyDictionary::ColClassSpan / std::sqrt(12 * std::min(10, colBinEdge)); + gr.mErrX = its3::TopologyDictionary::RowClassSpan / std::sqrt(12.f * (float)std::min(10, rowBinEdge)); + gr.mErrZ = its3::TopologyDictionary::ColClassSpan / std::sqrt(12.f * (float)std::min(10, colBinEdge)); gr.mErr2X = gr.mErrX * gr.mErrX; gr.mErr2Z = gr.mErrZ * gr.mErrZ; gr.mXCOG = 0; @@ -228,58 +275,65 @@ void BuildTopologyDictionary::groupRareTopologies() int rs{}, cs{}, index{}; // Updating the counts for the groups of rare topologies - for (auto j{mNCommonTopologies}; j < mTopologyFrequency.size(); j++) { - unsigned long hash1 = mTopologyFrequency[j].second; - rs = mTopologyMap.find(hash1)->second.topology.getRowSpan(); - cs = mTopologyMap.find(hash1)->second.topology.getColumnSpan(); + for (auto j{ncommon}; j < tfreq.size(); j++) { + unsigned long hash1 = tfreq[j].second; + rs = tstat.find(hash1)->second.topology.getRowSpan(); + cs = tstat.find(hash1)->second.topology.getColumnSpan(); index = its3::LookUp::groupFinder(rs, cs); - tmp_GroupMap[index].second += mTopologyFrequency[j].first; + tmp_GroupMap[index].second += tfreq[j].first; } for (auto&& p : tmp_GroupMap) { itsmft::GroupStruct& group = p.second.first; - group.mFrequency = ((double)p.second.second) / mTotClusters; - mDictionary.mVectorOfIDs.push_back(group); + group.mFrequency = ((double)p.second.second) / ntot; + data.mVectorOfIDs.push_back(group); } // Sorting the dictionary preserving all unique topologies - std::sort(mDictionary.mVectorOfIDs.begin(), mDictionary.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { + std::sort(data.mVectorOfIDs.begin(), data.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { return (!a.mIsGroup) && b.mIsGroup ? true : (a.mIsGroup && (!b.mIsGroup) ? false : (a.mFrequency > b.mFrequency)); }); - if (mDictionary.mVectorOfIDs.size() >= itsmft::CompCluster::InvalidPatternID - 1) { + if (data.mVectorOfIDs.size() >= itsmft::CompCluster::InvalidPatternID - 1) { LOGP(warning, "Max allowed {} patterns is reached, stopping", itsmft::CompCluster::InvalidPatternID - 1); - mDictionary.mVectorOfIDs.resize(itsmft::CompCluster::InvalidPatternID - 1); + data.mVectorOfIDs.resize(itsmft::CompCluster::InvalidPatternID - 1); } // Sorting the dictionary to final form - std::sort(mDictionary.mVectorOfIDs.begin(), mDictionary.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { return a.mFrequency > b.mFrequency; }); + std::sort(data.mVectorOfIDs.begin(), data.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { return a.mFrequency > b.mFrequency; }); // Creating the map for common topologies - for (int iKey = 0; iKey < mDictionary.getSize(); iKey++) { - itsmft::GroupStruct& gr = mDictionary.mVectorOfIDs[iKey]; + for (int iKey = 0; iKey < data.mVectorOfIDs.size(); iKey++) { + itsmft::GroupStruct& gr = data.mVectorOfIDs[iKey]; if (!gr.mIsGroup) { - mDictionary.mCommonMap.emplace(gr.mHash, iKey); + data.mCommonMap.emplace(gr.mHash, iKey); if (gr.mPattern.getUsedBytes() == 1) { - mDictionary.mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = iKey; + data.mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = iKey; } } else { - mDictionary.mGroupMap.emplace((int)(gr.mHash >> 32) & 0x00000000ffffffff, iKey); + data.mGroupMap.emplace((int)(gr.mHash >> 32) & 0x00000000ffffffff, iKey); } } - LOG(info) << "Dictionay finalised"; - LOG(info) << "Number of keys: " << mDictionary.getSize(); - LOG(info) << "Number of common topologies: " << mDictionary.mCommonMap.size(); - LOG(info) << "Number of groups of rare topologies: " << mDictionary.mGroupMap.size(); } std::ostream& operator<<(std::ostream& os, const BuildTopologyDictionary& DB) { - for (unsigned int i = 0; i < DB.mNCommonTopologies; i++) { - const unsigned long& hash = DB.mTopologyFrequency[i].second; + os << "--- InnerBarrel\n"; + for (unsigned int i = 0; i < DB.mNCommonTopologiesIB; i++) { + const unsigned long& hash = DB.mTopologyFrequencyIB[i].second; + os << "Hash: " << hash << '\n'; + os << "counts: " << DB.mTopologyMapIB.find(hash)->second.countsTotal; + os << " (with bias provided: " << DB.mTopologyMapIB.find(hash)->second.countsWithBias << ")" << '\n'; + os << "sigmaX: " << std::sqrt(DB.mMapInfoIB.find(hash)->second.mXsigma2) << '\n'; + os << "sigmaZ: " << std::sqrt(DB.mMapInfoIB.find(hash)->second.mZsigma2) << '\n'; + os << DB.mTopologyMapIB.find(hash)->second.topology; + } + os << "--- OuterBarrel\n"; + for (unsigned int i = 0; i < DB.mNCommonTopologiesOB; i++) { + const unsigned long& hash = DB.mTopologyFrequencyOB[i].second; os << "Hash: " << hash << '\n'; - os << "counts: " << DB.mTopologyMap.find(hash)->second.countsTotal; - os << " (with bias provided: " << DB.mTopologyMap.find(hash)->second.countsWithBias << ")" << '\n'; - os << "sigmaX: " << std::sqrt(DB.mMapInfo.find(hash)->second.mXsigma2) << '\n'; - os << "sigmaZ: " << std::sqrt(DB.mMapInfo.find(hash)->second.mZsigma2) << '\n'; - os << DB.mTopologyMap.find(hash)->second.topology; + os << "counts: " << DB.mTopologyMapOB.find(hash)->second.countsTotal; + os << " (with bias provided: " << DB.mTopologyMapOB.find(hash)->second.countsWithBias << ")" << '\n'; + os << "sigmaX: " << std::sqrt(DB.mMapInfoOB.find(hash)->second.mXsigma2) << '\n'; + os << "sigmaZ: " << std::sqrt(DB.mMapInfoOB.find(hash)->second.mZsigma2) << '\n'; + os << DB.mTopologyMapOB.find(hash)->second.topology; } return os; } diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx index 90f5245bcef58..bce17b3759340 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx @@ -12,15 +12,14 @@ /// \file Clusterer.cxx /// \brief Implementation of the ITS cluster finder -#include "ITS3Reconstruction/Clusterer.h" +#include -#include -#include "Framework/Logger.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Reconstruction/Clusterer.h" +#include "ITS3Base/SegmentationMosaix.h" #include "SimulationDataFormat/MCTruthContainer.h" #include "CommonDataFormat/InteractionRecord.h" -#include +#include "TTree.h" #ifdef WITH_OPENMP #include @@ -252,7 +251,7 @@ void Clusterer::ClustererThread::finishChip(ChipPixelData* curChipData, CompClus preClusterIndices[i2] = -1; } if (bbox.isAcceptableSize()) { - parent->streamCluster(pixArrBuff, &labelsBuff, bbox, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab); + parent->streamCluster(pixArrBuff, &labelsBuff, bbox, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab, constants::detID::isDetITS3(curChipData->getChipID())); } else { auto warnLeft = MaxHugeClusWarn - parent->mNHugeClus; if (warnLeft > 0) { @@ -278,7 +277,7 @@ void Clusterer::ClustererThread::finishChip(ChipPixelData* curChipData, CompClus } } if (!pixbuf.empty()) { // Stream a piece of cluster only if the reduced bounding box is not empty - parent->streamCluster(pixbuf, &labelsBuff, bboxT, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab, true); + parent->streamCluster(pixbuf, &labelsBuff, bboxT, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab, constants::detID::isDetITS3(curChipData->getChipID()), true); pixbuf.clear(); } bboxT.rowMin = bboxT.rowMax + 1; @@ -305,10 +304,12 @@ void Clusterer::ClustererThread::finishChipSingleHitFast(uint32_t hit, ChipPixel } } + auto ib = constants::detID::isDetITS3(curChipData->getChipID()); + // add to compact clusters, which must be always filled unsigned char patt[ClusterPattern::MaxPatternBytes]{0x1 << (7 - (0 % 8))}; // unrolled 1 hit version of full loop in finishChip - uint16_t pattID = (parent->mPattIdConverter.size() == 0) ? CompCluster::InvalidPatternID : parent->mPattIdConverter.findGroupID(1, 1, patt); - if ((pattID == CompCluster::InvalidPatternID || parent->mPattIdConverter.isGroup(pattID)) && patternsPtr) { + uint16_t pattID = (parent->mPattIdConverter.size(ib) == 0) ? CompCluster::InvalidPatternID : parent->mPattIdConverter.findGroupID(1, 1, ib, patt); + if ((pattID == CompCluster::InvalidPatternID || parent->mPattIdConverter.isGroup(pattID, ib)) && patternsPtr) { patternsPtr->emplace_back(1); // rowspan patternsPtr->emplace_back(1); // colspan patternsPtr->insert(patternsPtr->end(), std::begin(patt), std::begin(patt) + 1); @@ -334,7 +335,7 @@ void Clusterer::ClustererThread::initChip(const ChipPixelData* curChipData, uint size = itsmft::SegmentationAlpide::NRows + 2; int chipId = curChipData->getChipID(); if (its3::constants::detID::isDetITS3(chipId)) { - size = its3::SegmentationSuperAlpide::mNRows + 2; + size = its3::SegmentationMosaix::NRows + 2; } delete[] column1; diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx index 50e651f7f5675..58dd56ac41f95 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx @@ -16,8 +16,6 @@ #include "DataFormatsITSMFT/ROFRecord.h" #include "ITS3Reconstruction/TopologyDictionary.h" #include "ITSBase/GeometryTGeo.h" -#include "ITSMFTBase/SegmentationAlpide.h" -#include "ITS3Base/SegmentationSuperAlpide.h" #include "ITS3Base/SpecsV2.h" #include "ITStracking/TrackingConfigParam.h" #include "Framework/Logger.h" @@ -80,7 +78,6 @@ int loadROFrameDataITS3(its::TimeFrame* tf, auto isITS3 = its3::constants::detID::isDetITS3(sensorID); auto layer = geom->getLayer(sensorID); - auto pattID = c.getPatternID(); float sigmaY2{0}, sigmaZ2{0}, sigmaYZ{0}; uint8_t clusterSize{0}; auto locXYZ = extractClusterData(c, pattIt, dict, sigmaY2, sigmaZ2, clusterSize); diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h b/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h index f19a7fcaba9ca..2ebd89970d9a1 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h +++ b/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h @@ -16,6 +16,7 @@ #pragma link off all functions; #pragma link C++ class o2::its3::Clusterer + ; +#pragma link C++ class o2::its3::TopologyDictionaryData + ; #pragma link C++ class o2::its3::TopologyDictionary + ; #pragma link C++ class o2::its3::BuildTopologyDictionary + ; #pragma link C++ class o2::its3::LookUp + ; diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx index caabfa6f2decb..e137e091dc631 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx @@ -31,7 +31,8 @@ LookUp::LookUp(std::string fileName) void LookUp::loadDictionary(std::string fileName) { mDictionary.readFromFile(fileName); - mTopologiesOverThreshold = mDictionary.mCommonMap.size(); + mTopologiesOverThresholdIB = mDictionary.mDataIB.mCommonMap.size(); + mTopologiesOverThresholdOB = mDictionary.mDataOB.mCommonMap.size(); } void LookUp::setDictionary(const its3::TopologyDictionary* dict) @@ -39,7 +40,8 @@ void LookUp::setDictionary(const its3::TopologyDictionary* dict) if (dict != nullptr) { mDictionary = *dict; } - mTopologiesOverThreshold = mDictionary.mCommonMap.size(); + mTopologiesOverThresholdIB = mDictionary.mDataIB.mCommonMap.size(); + mTopologiesOverThresholdOB = mDictionary.mDataOB.mCommonMap.size(); } int LookUp::groupFinder(int nRow, int nCol) @@ -61,25 +63,26 @@ int LookUp::groupFinder(int nRow, int nCol) return grNum; } -int LookUp::findGroupID(int nRow, int nCol, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const +int LookUp::findGroupID(int nRow, int nCol, bool IB, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const { + const auto& data = (IB) ? mDictionary.mDataIB : mDictionary.mDataOB; int nBits = nRow * nCol; if (nBits < 9) { // Small unique topology - int ID = mDictionary.mSmallTopologiesLUT[(nCol - 1) * 255 + (int)patt[0]]; + int ID = data.mSmallTopologiesLUT[(nCol - 1) * 255 + (int)patt[0]]; if (ID >= 0) { return ID; } } else { // Big unique topology unsigned long hash = itsmft::ClusterTopology::getCompleteHash(nRow, nCol, patt); - auto ret = mDictionary.mCommonMap.find(hash); - if (ret != mDictionary.mCommonMap.end()) { + auto ret = data.mCommonMap.find(hash); + if (ret != data.mCommonMap.end()) { return ret->second; } } - if (!mDictionary.mGroupMap.empty()) { // rare valid topology group + if (!data.mGroupMap.empty()) { // rare valid topology group int index = groupFinder(nRow, nCol); - auto res = mDictionary.mGroupMap.find(index); - return res == mDictionary.mGroupMap.end() ? itsmft::CompCluster::InvalidPatternID : res->second; + auto res = data.mGroupMap.find(index); + return res == data.mGroupMap.end() ? itsmft::CompCluster::InvalidPatternID : res->second; } return itsmft::CompCluster::InvalidPatternID; } diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx index 66a4b0a6878cd..61ab051ffb565 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx @@ -12,7 +12,7 @@ /// \file TopologyDictionary.cxx #include "ITS3Reconstruction/TopologyDictionary.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITSMFTBase/SegmentationAlpide.h" #include "CommonUtils/StringUtils.h" #include @@ -23,9 +23,16 @@ ClassImp(o2::its3::TopologyDictionary); namespace o2::its3 { +void TopologyDictionaryData::print() const noexcept +{ + LOG(info) << "Number of keys: " << mVectorOfIDs.size(); + LOG(info) << "Number of common topologies: " << mCommonMap.size(); + LOG(info) << "Number of groups of rare topologies: " << mGroupMap.size(); +} + TopologyDictionary::TopologyDictionary() { - memset(mSmallTopologiesLUT, -1, STopoSize * sizeof(int)); + reset(); } TopologyDictionary::TopologyDictionary(const std::string& fileName) @@ -33,10 +40,43 @@ TopologyDictionary::TopologyDictionary(const std::string& fileName) readFromFile(fileName); } +void TopologyDictionary::print() const noexcept +{ + LOG(info) << "ITS3 TopologyDictionary"; + LOG(info) << "InnerBarrel"; + mDataIB.print(); + LOG(info) << "OuterBarrel"; + mDataOB.print(); +} + +void TopologyDictionary::reset() noexcept +{ + mDataIB.mSmallTopologiesLUT.fill(-1); + mDataOB.mSmallTopologiesLUT.fill(-1); + mDataIB.mVectorOfIDs.clear(); + mDataOB.mVectorOfIDs.clear(); +} + +void TopologyDictionary::resetMaps(bool IB) noexcept +{ + auto& data = (IB) ? mDataIB : mDataOB; + data.mCommonMap.clear(); + data.mGroupMap.clear(); +} + std::ostream& operator<<(std::ostream& os, const its3::TopologyDictionary& dict) { int ID = 0; - for (auto& p : dict.mVectorOfIDs) { + os << "--- InnerBarrel:\n"; + for (auto& p : dict.mDataIB.mVectorOfIDs) { + os << "ID: " << ID++ << " Hash: " << p.mHash << " ErrX: " << p.mErrX << " ErrZ : " << p.mErrZ << " xCOG: " << p.mXCOG << " zCOG: " << p.mZCOG << " Npixles: " << p.mNpixels << " Frequency: " << p.mFrequency << " isGroup : " << std::boolalpha << p.mIsGroup << '\n' + << p.mPattern << '\n' + << "*********************************************************" << '\n' + << '\n'; + } + ID = 0; + os << "--- OuterBarrel:\n"; + for (auto& p : dict.mDataOB.mVectorOfIDs) { os << "ID: " << ID++ << " Hash: " << p.mHash << " ErrX: " << p.mErrX << " ErrZ : " << p.mErrZ << " xCOG: " << p.mXCOG << " zCOG: " << p.mZCOG << " Npixles: " << p.mNpixels << " Frequency: " << p.mFrequency << " isGroup : " << std::boolalpha << p.mIsGroup << '\n' << p.mPattern << '\n' << "*********************************************************" << '\n' @@ -48,24 +88,36 @@ std::ostream& operator<<(std::ostream& os, const its3::TopologyDictionary& dict) void TopologyDictionary::writeBinaryFile(const std::string& outputfile) { std::ofstream file_output(outputfile, std::ios::out | std::ios::binary); - for (auto& p : mVectorOfIDs) { - file_output.write(reinterpret_cast(&p.mHash), sizeof(unsigned long)); - file_output.write(reinterpret_cast(&p.mErrX), sizeof(float)); - file_output.write(reinterpret_cast(&p.mErrZ), sizeof(float)); - file_output.write(reinterpret_cast(&p.mErr2X), sizeof(float)); - file_output.write(reinterpret_cast(&p.mErr2Z), sizeof(float)); - file_output.write(reinterpret_cast(&p.mXCOG), sizeof(float)); - file_output.write(reinterpret_cast(&p.mZCOG), sizeof(float)); - file_output.write(reinterpret_cast(&p.mNpixels), sizeof(int)); - file_output.write(reinterpret_cast(&p.mFrequency), sizeof(double)); - file_output.write(reinterpret_cast(&p.mIsGroup), sizeof(bool)); - file_output.write(const_cast(reinterpret_cast(&p.mPattern.getPattern())), - sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); + if (!file_output) { + throw std::runtime_error(fmt::format("Cannot open output file %s!", outputfile)); } + + auto writeData = [](auto& file_output, auto& data) { + auto size = data.mVectorOfIDs.size(); + file_output.write(reinterpret_cast(&size), sizeof(size)); + for (auto& p : data.mVectorOfIDs) { + file_output.write(reinterpret_cast(&p.mHash), sizeof(unsigned long)); + file_output.write(reinterpret_cast(&p.mErrX), sizeof(float)); + file_output.write(reinterpret_cast(&p.mErrZ), sizeof(float)); + file_output.write(reinterpret_cast(&p.mErr2X), sizeof(float)); + file_output.write(reinterpret_cast(&p.mErr2Z), sizeof(float)); + file_output.write(reinterpret_cast(&p.mXCOG), sizeof(float)); + file_output.write(reinterpret_cast(&p.mZCOG), sizeof(float)); + file_output.write(reinterpret_cast(&p.mNpixels), sizeof(int)); + file_output.write(reinterpret_cast(&p.mFrequency), sizeof(double)); + file_output.write(reinterpret_cast(&p.mIsGroup), sizeof(bool)); + file_output.write(const_cast(reinterpret_cast(&p.mPattern.getPattern())), + sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); + } + }; + + writeData(file_output, mDataIB); + writeData(file_output, mDataOB); + file_output.close(); } -int TopologyDictionary::readFromFile(const std::string& fname) +void TopologyDictionary::readFromFile(const std::string& fname) { LOGP(info, "Reading TopologyDictionary from File '{}'", fname); if (o2::utils::Str::endsWith(fname, ".root")) { @@ -76,59 +128,63 @@ int TopologyDictionary::readFromFile(const std::string& fname) } else { throw std::runtime_error(fmt::format("Unrecognized format {}", fname)); } - return 0; } -int TopologyDictionary::readBinaryFile(const std::string& fname) +void TopologyDictionary::readBinaryFile(const std::string& fname) { - mVectorOfIDs.clear(); - mCommonMap.clear(); - for (auto& p : mSmallTopologiesLUT) { - p = -1; - } + reset(); + std::ifstream in(fname.data(), std::ios::in | std::ios::binary); - itsmft::GroupStruct gr; - int groupID = 0; if (!in.is_open()) { LOG(error) << "The file " << fname << " coud not be opened"; throw std::runtime_error("The file coud not be opened"); } else { - while (in.read(reinterpret_cast(&gr.mHash), sizeof(unsigned long))) { - in.read(reinterpret_cast(&gr.mErrX), sizeof(float)); - in.read(reinterpret_cast(&gr.mErrZ), sizeof(float)); - in.read(reinterpret_cast(&gr.mErr2X), sizeof(float)); - in.read(reinterpret_cast(&gr.mErr2Z), sizeof(float)); - in.read(reinterpret_cast(&gr.mXCOG), sizeof(float)); - in.read(reinterpret_cast(&gr.mZCOG), sizeof(float)); - in.read(reinterpret_cast(&gr.mNpixels), sizeof(int)); - in.read(reinterpret_cast(&gr.mFrequency), sizeof(double)); - in.read(reinterpret_cast(&gr.mIsGroup), sizeof(bool)); - in.read(const_cast(reinterpret_cast(&gr.mPattern.getPattern())), sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); - mVectorOfIDs.push_back(gr); - if (!gr.mIsGroup) { - mCommonMap.insert(std::make_pair(gr.mHash, groupID)); - if (gr.mPattern.getUsedBytes() == 1) { - mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = groupID; + + auto readData = [](auto& in, auto& data) { + int groupID = 0; + std::size_t size{}, cur{}; + itsmft::GroupStruct gr; + in.read(reinterpret_cast(&size), sizeof(std::size_t)); + while (cur++ != size) { + in.read(reinterpret_cast(&gr.mHash), sizeof(unsigned long)); + in.read(reinterpret_cast(&gr.mErrX), sizeof(float)); + in.read(reinterpret_cast(&gr.mErrZ), sizeof(float)); + in.read(reinterpret_cast(&gr.mErr2X), sizeof(float)); + in.read(reinterpret_cast(&gr.mErr2Z), sizeof(float)); + in.read(reinterpret_cast(&gr.mXCOG), sizeof(float)); + in.read(reinterpret_cast(&gr.mZCOG), sizeof(float)); + in.read(reinterpret_cast(&gr.mNpixels), sizeof(int)); + in.read(reinterpret_cast(&gr.mFrequency), sizeof(double)); + in.read(reinterpret_cast(&gr.mIsGroup), sizeof(bool)); + in.read(const_cast(reinterpret_cast(&gr.mPattern.getPattern())), sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); + data.mVectorOfIDs.push_back(gr); + if (!gr.mIsGroup) { + data.mCommonMap.insert(std::make_pair(gr.mHash, groupID)); + if (gr.mPattern.getUsedBytes() == 1) { + data.mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = groupID; + } + } else { + data.mGroupMap.insert(std::make_pair((int)(gr.mHash >> 32) & 0x00000000ffffffff, groupID)); } - } else { - mGroupMap.insert(std::make_pair((int)(gr.mHash >> 32) & 0x00000000ffffffff, groupID)); + groupID++; } - groupID++; - } + }; + + readData(in, mDataIB); + readData(in, mDataOB); } in.close(); - return 0; } -TH1F* TopologyDictionary::getTopologyDistribution(const std::string_view hname) const +TH1F* TopologyDictionary::getTopologyDistribution(const std::string_view hname, bool IB) const { - int dictSize = getSize(); - auto* histo = new TH1F(hname.data(), ";Topology ID;Frequency", dictSize, -0.5, dictSize - 0.5); + int dictSize = getSize(IB); + auto* histo = new TH1F(hname.data(), Form("%s;Topology ID;Frequency", (IB) ? "InnerBarrel" : "OuterBarrel"), dictSize, -0.5, dictSize - 0.5); histo->SetFillColor(kRed); histo->SetFillStyle(3005); histo->SetDrawOption("histo"); for (int i = 0; i < dictSize; i++) { - histo->Fill(i, getFrequency(i)); + histo->Fill(i, getFrequency(i, IB)); } return histo; } @@ -136,18 +192,19 @@ TH1F* TopologyDictionary::getTopologyDistribution(const std::string_view hname) template math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::CompClusterExt& cl) const { + static std::array mIBSegmentations{0, 1, 2}; math_utils::Point3D locCl; if (!its3::constants::detID::isDetITS3(cl.getSensorID())) { o2::itsmft::SegmentationAlpide::detectorToLocalUnchecked(cl.getRow(), cl.getCol(), locCl); - locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID()) * itsmft::SegmentationAlpide::PitchRow); - locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID()) * itsmft::SegmentationAlpide::PitchCol); + locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID(), false) * itsmft::SegmentationAlpide::PitchRow); + locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID(), false) * itsmft::SegmentationAlpide::PitchCol); } else { auto layer = its3::constants::detID::getDetID2Layer(cl.getSensorID()); - its3::SuperSegmentations[layer].detectorToLocalUnchecked(cl.getRow(), cl.getCol(), locCl); - locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID()) * its3::SegmentationSuperAlpide::mPitchRow); - locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID()) * its3::SegmentationSuperAlpide::mPitchCol); + mIBSegmentations[layer].detectorToLocalUnchecked(cl.getRow(), cl.getCol(), locCl); + locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID(), true) * its3::SegmentationMosaix::PitchRow); + locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID(), true) * its3::SegmentationMosaix::PitchCol); float xCurved{0.f}, yCurved{0.f}; - its3::SuperSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); + mIBSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); locCl.SetXYZ(xCurved, yCurved, locCl.Z()); } return locCl; @@ -156,6 +213,7 @@ math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::C template math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::CompClusterExt& cl, const itsmft::ClusterPattern& patt, bool isGroup) { + static std::array mIBSegmentations{0, 1, 2}; auto refRow = cl.getRow(); auto refCol = cl.getCol(); float xCOG = 0, zCOG = 0; @@ -169,9 +227,9 @@ math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::C o2::itsmft::SegmentationAlpide::detectorToLocalUnchecked(refRow + xCOG, refCol + zCOG, locCl); } else { auto layer = its3::constants::detID::getDetID2Layer(cl.getSensorID()); - its3::SuperSegmentations[layer].detectorToLocalUnchecked(refRow + xCOG, refCol + zCOG, locCl); + mIBSegmentations[layer].detectorToLocalUnchecked(refRow + xCOG, refCol + zCOG, locCl); float xCurved{0.f}, yCurved{0.f}; - its3::SuperSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); + mIBSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); locCl.SetXYZ(xCurved, yCurved, locCl.Z()); } return locCl; diff --git a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt index 17a8fd2748b87..2fad72a96426d 100644 --- a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt @@ -14,6 +14,7 @@ o2_add_library(ITS3Simulation src/ITS3Services.cxx src/DescriptorInnerBarrelITS3.cxx src/Digitizer.cxx + src/DigiParams.cxx PUBLIC_LINK_LIBRARIES O2::SimulationDataFormat O2::ITSBase O2::ITSMFTSimulation ROOT::Physics) @@ -23,6 +24,7 @@ o2_target_root_dictionary(ITS3Simulation include/ITS3Simulation/ITS3Services.h include/ITS3Simulation/DescriptorInnerBarrelITS3.h include/ITS3Simulation/Digitizer.h + include/ITS3Simulation/DigiParams.h ) -o2_data_file(COPY data DESTINATION Detectors/ITS3/simulation) \ No newline at end of file +o2_data_file(COPY data DESTINATION Detectors/ITS3/simulation) diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h index 80536a14d99c2..d1b54f81face4 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h @@ -45,9 +45,9 @@ class DescriptorInnerBarrelITS3 : public o2::its::DescriptorInnerBarrel int mNumLayers{constants::nLayers}; // wrapper volume properties - double mWrapperMinRadiusITS3{1.8}; - double mWrapperMaxRadiusITS3{4.}; - double mWrapperZSpanITS3{20.}; + static constexpr double mWrapperMinRadiusITS3{1.8}; + static constexpr double mWrapperMaxRadiusITS3{4.}; + static constexpr double mWrapperZSpanITS3{constants::segment::length + 5.}; private: std::array, constants::nLayers> mIBLayers; diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h new file mode 100644 index 0000000000000..eca0a71949ba7 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h @@ -0,0 +1,45 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ITS3_DIGIPARAMS_H +#define ITS3_DIGIPARAMS_H + +#include "ITSMFTSimulation/DigiParams.h" + +namespace o2::its3 +{ + +class DigiParams final : public o2::itsmft::DigiParams +{ + public: + const o2::itsmft::AlpideSimResponse* getAlpSimResponse() const = delete; + void setAlpSimResponse(const o2::itsmft::AlpideSimResponse* par) = delete; + + const o2::itsmft::AlpideSimResponse* getOBSimResponse() const { return mOBSimResponse; } + void setOBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mOBSimResponse = response; } + + const o2::itsmft::AlpideSimResponse* getIBSimResponse() const { return mIBSimResponse; } + void setIBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mIBSimResponse = response; } + + bool hasResponseFunctions() const { return mIBSimResponse != nullptr && mOBSimResponse != nullptr; } + + void print() const final; + + private: + const o2::itsmft::AlpideSimResponse* mOBSimResponse = nullptr; //!< pointer to external response + const o2::itsmft::AlpideSimResponse* mIBSimResponse = nullptr; //!< pointer to external response + + ClassDef(DigiParams, 1); +}; + +} // namespace o2::its3 + +#endif diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h index 7ece842b6f61f..8d0f06a27343b 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h @@ -18,15 +18,15 @@ #include #include -#include "Rtypes.h" // for Digitizer::Class -#include "TObject.h" // for TObject +#include "Rtypes.h" +#include "TObject.h" #include "ITSMFTSimulation/ChipDigitsContainer.h" #include "ITSMFTSimulation/AlpideSimResponse.h" -#include "ITSMFTSimulation/DigiParams.h" #include "ITSMFTSimulation/Hit.h" #include "ITSBase/GeometryTGeo.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" +#include "ITS3Simulation/DigiParams.h" #include "DataFormatsITSMFT/Digit.h" #include "DataFormatsITSMFT/ROFRecord.h" #include "CommonDataFormat/InteractionRecord.h" @@ -35,6 +35,7 @@ namespace o2::its3 { + class Digitizer : public TObject { using ExtraDig = std::vector; ///< container for extra contributions to PreDigits @@ -44,8 +45,8 @@ class Digitizer : public TObject void setMCLabels(o2::dataformats::MCTruthContainer* mclb) { mMCLabels = mclb; } void setROFRecords(std::vector* rec) { mROFRecords = rec; } - o2::itsmft::DigiParams& getParams() { return (o2::itsmft::DigiParams&)mParams; } - const o2::itsmft::DigiParams& getParams() const { return mParams; } + o2::its3::DigiParams& getParams() { return mParams; } + const o2::its3::DigiParams& getParams() const { return mParams; } void init(); @@ -62,9 +63,6 @@ class Digitizer : public TObject bool isContinuous() const { return mParams.isContinuous(); } void fillOutputContainer(uint32_t maxFrame = 0xffffffff); - void setDigiParams(const o2::itsmft::DigiParams& par) { mParams = par; } - const o2::itsmft::DigiParams& getDigitParams() const { return mParams; } - // provide the common itsmft::GeometryTGeo to access matrices and segmentation void setGeometry(const o2::its::GeometryTGeo* gm) { mGeometry = gm; } @@ -97,7 +95,7 @@ class Digitizer : public TObject static constexpr float sec2ns = 1e9; - o2::itsmft::DigiParams mParams; ///< digitization parameters + o2::its3::DigiParams mParams; ///< digitization parameters o2::InteractionTimeRecord mEventTime; ///< global event time and interaction record o2::InteractionRecord mIRFirstSampledTF; ///< IR of the 1st sampled IR, noise-only ROFs will be inserted till this IR only double mCollisionTimeWrtROF{}; @@ -108,7 +106,15 @@ class Digitizer : public TObject uint32_t mEventROFrameMin = 0xffffffff; ///< lowest RO frame for processed events (w/o automatic noise ROFs) uint32_t mEventROFrameMax = 0; ///< highest RO frame forfor processed events (w/o automatic noise ROFs) - o2::itsmft::AlpideSimResponse* mAlpSimResp = nullptr; // simulated response + static constexpr std::array mIBSegmentations{0, 1, 2}; + + o2::itsmft::AlpideSimResponse* mSimRespIB = nullptr; // simulated response for IB + o2::itsmft::AlpideSimResponse* mSimRespOB = nullptr; // simulated response for OB + bool mSimRespIBOrientation{false}; // wether the orientation in the IB response function is flipped + float mSimRespIBShift{0.f}; // adjusting the Y-shift in the IB response function to match sensor local coord. + float mSimRespIBScaleX{1.f}; // scale x-local coordinate to response function x-coordinate + float mSimRespIBScaleZ{1.f}; // scale z-local coordinate to response function z-coordinate + float mSimRespOBShift{0.f}; // adjusting the Y-shift in the OB response function to match sensor local coord. const o2::its::GeometryTGeo* mGeometry = nullptr; ///< ITS3 geometry @@ -121,8 +127,9 @@ class Digitizer : public TObject const o2::itsmft::NoiseMap* mDeadChanMap = nullptr; - ClassDef(Digitizer, 4); + ClassDef(Digitizer, 5); }; + } // namespace o2::its3 #endif /* ALICEO2_ITS3_DIGITIZER_H */ diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h index 7543650e04a71..fd9195f9ee228 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h @@ -20,13 +20,12 @@ #include #include -#include "Framework/Logger.h" -#include +#include "ITS3Base/SpecsV2.h" namespace o2::its3 { -/// This class defines the Geometry for the ITS3 using TGeo. +/// This class defines the geometry for the ITS3 IB layers. class ITS3Layer { // The hierarchy will be the following: @@ -45,8 +44,8 @@ class ITS3Layer kTile, kRSU, kSegment, - kCarbonForm, kChip, + kCarbonForm, kLayer, kAll, }; @@ -56,11 +55,10 @@ class ITS3Layer return mNames[static_cast((b == BuildLevel::kAll) ? BuildLevel::kLayer : b)]; } - explicit ITS3Layer(int layer = 0) : mNLayer(layer) - { - LOGP(debug, "Called on {} layer {}", layer, mNLayer); - init(); - } + explicit ITS3Layer(int layer = 0) : mNLayer(layer), + mR(o2::its3::constants::radii[mNLayer]), + mRmin(o2::its3::constants::radiiInner[mNLayer]), + mRmax(o2::its3::constants::radiiOuter[mNLayer]) {} explicit ITS3Layer(TGeoVolume* motherVolume, int layer = 0) : ITS3Layer(layer) { @@ -82,6 +80,7 @@ class ITS3Layer TGeoMedium* mSilicon{nullptr}; TGeoMedium* mAir{nullptr}; TGeoMedium* mCarbon{nullptr}; + TGeoMedium* mCopper{nullptr}; void getMaterials(bool create = false); TGeoMedium* getMaterial(const char* matName, bool create = false); @@ -97,10 +96,12 @@ class ITS3Layer uint8_t mNLayer{0}; // Layer number double mR{0}; // Middle Radius - double mRmin{}; // Minimum Radius + double mRmin{0}; // Minimum Radius double mRmax{0}; // Maximum Radius - // Individual Pieces + // Individual pieces + // since TGeo manages the resources itself one should not use these pointers + // after initializition anymore! TGeoVolume* mPixelArray{nullptr}; TGeoVolumeAssembly* mTile{nullptr}; TGeoVolumeAssembly* mRSU{nullptr}; @@ -109,7 +110,7 @@ class ITS3Layer TGeoVolumeAssembly* mCarbonForm{nullptr}; TGeoVolumeAssembly* mLayer{nullptr}; - ClassDef(ITS3Layer, 2); + ClassDef(ITS3Layer, 3); }; } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx b/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx index 6d1bc621b5287..540e1d41f1c62 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx @@ -18,14 +18,14 @@ ClassImp(DescriptorInnerBarrelITS3); void DescriptorInnerBarrelITS3::createLayer(int iLayer, TGeoVolume* dest) { - LOGP(info, "ITS3-IB: Creating Layer {}", iLayer); + LOGP(debug, "ITS3-IB: Creating Layer {}", iLayer); mIBLayers[iLayer] = std::make_unique(iLayer); mIBLayers[iLayer]->createLayer(dest); } void DescriptorInnerBarrelITS3::createServices(TGeoVolume* dest) { - LOGP(info, "ITS3-IB: Creating Services"); + LOGP(debug, "ITS3-IB: Creating Services"); mServices = std::make_unique(); mServices->createCYSSAssembly(dest); } diff --git a/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx new file mode 100644 index 0000000000000..a9f17a544b3c4 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx @@ -0,0 +1,40 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file DigiParams.cxx +/// \brief Implementation of the ITS3 digitization steering params + +#include "Framework/Logger.h" +#include "ITS3Simulation/DigiParams.h" + +ClassImp(o2::its3::DigiParams); + +namespace o2::its3 +{ + +void DigiParams::print() const +{ + // print settings + LOGF(info, "ITS3 DigiParams settings:"); + LOGF(info, "Continuous readout : %s", isContinuous() ? "ON" : "OFF"); + LOGF(info, "Readout Frame Length(ns) : %f", getROFrameLength()); + LOGF(info, "Strobe delay (ns) : %f", getStrobeDelay()); + LOGF(info, "Strobe length (ns) : %f", getStrobeLength()); + LOGF(info, "Threshold (N electrons) : %d", getChargeThreshold()); + LOGF(info, "Min N electrons to account : %d", getMinChargeToAccount()); + LOGF(info, "Number of charge sharing steps : %d", getNSimSteps()); + LOGF(info, "ELoss to N electrons factor : %e", getEnergyToNElectrons()); + LOGF(info, "Noise level per pixel : %e", getNoisePerPixel()); + LOGF(info, "Charge time-response:\n"); + getSignalShape().print(); +} + +} // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx index f1519c1d04063..3c75bf3e8f680 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx @@ -14,6 +14,7 @@ #include "ITSMFTBase/SegmentationAlpide.h" #include "ITS3Simulation/Digitizer.h" +#include "ITS3Base/ITS3Params.h" #include "MathUtils/Cartesian.h" #include "SimulationDataFormat/MCTruthContainer.h" #include "DetectorsRaw/HBFUtils.h" @@ -21,12 +22,12 @@ #include "Framework/Logger.h" #include +#include #include #include using o2::itsmft::Hit; -using Segmentation = o2::itsmft::SegmentationAlpide; -using SuperSegmentation = o2::its3::SegmentationSuperAlpide; +using SegmentationAlpide = o2::itsmft::SegmentationAlpide; using o2::itsmft::AlpideRespSimMat; using o2::itsmft::PreDigit; @@ -44,14 +45,44 @@ void Digitizer::init() } } - if (mParams.getAlpSimResponse() == nullptr) { - std::string responseFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; - LOGP(info, "Loading AlpideSimRespnse from file: {}", responseFile); - auto file = TFile::Open(responseFile.data()); - mAlpSimResp = (o2::itsmft::AlpideSimResponse*)file->Get("response0"); // We use by default the alpide response for Vbb=0V - mParams.setAlpSimResponse(mAlpSimResp); + if (!mParams.hasResponseFunctions()) { + auto loadSetResponseFunc = [&](const char* name, const char* fileIB, const char* nameIB, const char* fileOB, const char* nameOB) { + LOGP(info, "Loading response function for {}: IB={}:{} ; OB={}:{}", name, nameIB, fileIB, nameOB, fileOB); + auto fIB = TFile::Open(fileIB, "READ"); + if (!fIB || fIB->IsZombie() || !fIB->IsOpen()) { + LOGP(fatal, "Cannot open file {}", fileIB); + } + auto fOB = TFile::Open(fileOB, "READ"); + if (!fOB || fOB->IsZombie() || !fOB->IsOpen()) { + LOGP(fatal, "Cannot open file {}", fileOB); + } + mParams.setIBSimResponse(mSimRespIB = fIB->Get(nameIB)); + mParams.setOBSimResponse(mSimRespOB = fOB->Get(nameOB)); + fIB->Close(); + fOB->Close(); + }; + + if (const auto& func = ITS3Params::Instance().chipResponseFunction; func == "Alpide") { + constexpr const char* responseFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; + loadSetResponseFunc("Alpide", responseFile, "response0", responseFile, "response1"); + mSimRespIBShift = mSimRespIB->getDepthMax() - SegmentationMosaix::SensorLayerThickness / 2.f + 10.e-4f; + mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; + } else if (func == "APTS") { + constexpr const char* responseFileIB = "$(O2_ROOT)/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/APTSResponseData.root"; + constexpr const char* responseFileOB = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; + loadSetResponseFunc("APTS", responseFileIB, "response1", responseFileOB, "response1"); + mSimRespIBShift = mSimRespIB->getDepthMax() + (float)constants::pixelarray::pixels::apts::responseYShift; + mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; + mSimRespIBScaleX = 0.5f * constants::pixelarray::pixels::apts::pitchX / SegmentationMosaix::PitchRow; + mSimRespIBScaleZ = 0.5f * constants::pixelarray::pixels::apts::pitchZ / SegmentationMosaix::PitchCol; + mSimRespIBOrientation = true; + } else { + LOGP(fatal, "ResponseFunction '{}' not implemented!", func); + } } mParams.print(); + LOGP(info, "IBShift = {} ; OBShift = {}", mSimRespIBShift, mSimRespOBShift); + LOGP(info, "IB-Scale: X={} ; Z={}", mSimRespIBScaleX, mSimRespIBScaleZ); mIRFirstSampledTF = o2::raw::HBFUtils::Instance().getFirstSampledTFIR(); } @@ -143,7 +174,7 @@ void Digitizer::fillOutputContainer(uint32_t frameLast) for (size_t iChip{0}; iChip < mChips.size(); ++iChip) { auto& chip = mChips[iChip]; if (constants::detID::isDetITS3(iChip)) { // Check if this is a chip of ITS3 - chip.addNoise(mROFrameMin, mROFrameMin, &mParams, SuperSegmentation::mNRows, SuperSegmentation::mNCols); + chip.addNoise(mROFrameMin, mROFrameMin, &mParams, SegmentationMosaix::NRows, SegmentationMosaix::NCols); } else { chip.addNoise(mROFrameMin, mROFrameMin, &mParams); } @@ -238,8 +269,8 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID if (innerBarrel) { // transform the point on the curved surface to a flat one float xFlatE{0.f}, yFlatE{0.f}, xFlatS{0.f}, yFlatS{0.f}; - SuperSegmentations[layer].curvedToFlat(xyzLocS.X(), xyzLocS.Y(), xFlatS, yFlatS); - SuperSegmentations[layer].curvedToFlat(xyzLocE.X(), xyzLocE.Y(), xFlatE, yFlatE); + mIBSegmentations[layer].curvedToFlat(xyzLocS.X(), xyzLocS.Y(), xFlatS, yFlatS); + mIBSegmentations[layer].curvedToFlat(xyzLocE.X(), xyzLocE.Y(), xFlatE, yFlatE); // update the local coordinates with the flattened ones xyzLocS.SetXYZ(xFlatS, yFlatS, xyzLocS.Z()); xyzLocE.SetXYZ(xFlatE, yFlatE, xyzLocE.Z()); @@ -255,14 +286,14 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID int rowS = -1, colS = -1, rowE = -1, colE = -1, nSkip = 0; if (innerBarrel) { // get entrance pixel row and col - while (!SuperSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + while (!mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } xyzLocS += step; } // get exit pixel row and col - while (!SuperSegmentations[layer].localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + while (!mIBSegmentations[layer].localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } @@ -270,14 +301,14 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } } else { // get entrance pixel row and col - while (!Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + while (!SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } xyzLocS += step; } // get exit pixel row and col - while (!Segmentation::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + while (!SegmentationAlpide::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } @@ -294,23 +325,17 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } rowS -= AlpideRespSimMat::NPix / 2; rowE += AlpideRespSimMat::NPix / 2; - if (rowS < 0) { - rowS = 0; - } + rowS = std::max(rowS, 0); - int maxNrows{innerBarrel ? SuperSegmentation::mNRows : Segmentation::NRows}; - int maxNcols{innerBarrel ? SuperSegmentation::mNCols : Segmentation::NCols}; - if (rowE >= maxNrows) { - rowE = maxNrows - 1; - } + const int maxNrows{innerBarrel ? SegmentationMosaix::NRows : SegmentationAlpide::NRows}; + const int maxNcols{innerBarrel ? SegmentationMosaix::NCols : SegmentationAlpide::NCols}; + + rowE = std::min(rowE, maxNrows - 1); colS -= AlpideRespSimMat::NPix / 2; colE += AlpideRespSimMat::NPix / 2; - if (colS < 0) { - colS = 0; - } - if (colE >= maxNcols) { - colE = maxNcols - 1; - } + colS = std::max(colS, 0); + colE = std::min(colE, maxNcols - 1); + int rowSpan = rowE - rowS + 1, colSpan = colE - colS + 1; // size of plaquet where some response is expected float respMatrix[rowSpan][colSpan]; // response accumulated here std::fill(&respMatrix[0][0], &respMatrix[0][0] + rowSpan * colSpan, 0.f); @@ -327,22 +352,22 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID // take into account that the AlpideSimResponse depth defintion has different min/max boundaries // although the max should coincide with the surface of the epitaxial layer, which in the chip // local coordinates has Y = +SensorLayerThickness/2 - float thickness = innerBarrel ? SuperSegmentation::mSensorLayerThickness : Segmentation::SensorLayerThickness; - xyzLocS.SetY(xyzLocS.Y() + mAlpSimResp->getDepthMax() - thickness / 2.); + xyzLocS.SetY(xyzLocS.Y() + ((innerBarrel) ? mSimRespIBShift : mSimRespOBShift)); + // collect charge in evey pixel which might be affected by the hit for (int iStep = nSteps; iStep--;) { // Get the pixel ID if (innerBarrel) { - SuperSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } else { - Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } if (row != rowPrev || col != colPrev) { // update pixel and coordinates of its center if (innerBarrel) { - if (!SuperSegmentations[layer].detectorToLocal(row, col, cRowPix, cColPix)) { + if (!mIBSegmentations[layer].detectorToLocal(row, col, cRowPix, cColPix)) { continue; } - } else if (!Segmentation::detectorToLocal(row, col, cRowPix, cColPix)) { + } else if (!SegmentationAlpide::detectorToLocal(row, col, cRowPix, cColPix)) { continue; // should not happen } rowPrev = row; @@ -350,9 +375,17 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } bool flipCol = false, flipRow = false; // note that response needs coordinates along column row (locX) (locZ) then depth (locY) - double rowMax{0.5f * (innerBarrel ? SuperSegmentation::mPitchRow : Segmentation::PitchRow)}; - double colMax{0.5f * (innerBarrel ? SuperSegmentation::mPitchCol : Segmentation::PitchCol)}; - auto rspmat = mAlpSimResp->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); + float rowMax{}, colMax{}; + const AlpideRespSimMat* rspmat{nullptr}; + if (innerBarrel) { + rowMax = 0.5f * SegmentationMosaix::PitchRow; + colMax = 0.5f * SegmentationMosaix::PitchCol; + rspmat = mSimRespIB->getResponse(mSimRespIBScaleX * (xyzLocS.X() - cRowPix), mSimRespIBScaleZ * (xyzLocS.Z() - cColPix), xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); + } else { + rowMax = 0.5f * SegmentationAlpide::PitchRow; + colMax = 0.5f * SegmentationAlpide::PitchCol; + rspmat = mSimRespOB->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); + } xyzLocS += step; if (rspmat == nullptr) { @@ -369,7 +402,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID if (colDest < 0 || colDest >= colSpan) { continue; } - respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, flipRow, flipCol); + respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, ((innerBarrel && mSimRespIBOrientation) ? !flipRow : flipRow), flipCol); } } } diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx b/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx index 26e47e03057c2..8dc94e339c793 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx @@ -18,11 +18,11 @@ #include "TGeoVolume.h" #include "TGeoCompositeShape.h" +#include "Framework/Logger.h" #include "CommonConstants/MathConstants.h" #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/SpecsV2.h" #include "ITS3Simulation/ITS3Layer.h" -#include "fairlogger/Logger.h" namespace o2m = o2::constants::math; namespace its3c = o2::its3::constants; @@ -31,13 +31,6 @@ namespace o2::its3 { using its3TGeo = o2::its::GeometryTGeo; -void ITS3Layer::init() -{ - mR = its3c::radii[mNLayer]; - mRmin = its3c::radiiInner[mNLayer]; - mRmax = its3c::radiiOuter[mNLayer]; -} - void ITS3Layer::getMaterials(bool create) { if (gGeoManager == nullptr) { @@ -47,6 +40,7 @@ void ITS3Layer::getMaterials(bool create) mSilicon = getMaterial("IT3_SI$", create); mAir = getMaterial("IT3_AIR$", create); mCarbon = getMaterial("IT3_CARBON$", create); + mCopper = getMaterial("IT3_COPPER$", create); } TGeoMedium* ITS3Layer::getMaterial(const char* matName, bool create) @@ -58,11 +52,11 @@ TGeoMedium* ITS3Layer::getMaterial(const char* matName, bool create) } else { // create dummy auto matDummy = gGeoManager->GetMaterial("MAT_DUMMY$"); if (matDummy == nullptr) { - LOGP(info, "Created Dummy material"); + LOGP(warn, "Created Dummy material"); matDummy = new TGeoMaterial("MAT_DUMMY$", 26.98, 13, 2.7); } mat = new TGeoMedium(matName, 1, matDummy); - LOGP(info, "Created medium {}", matName); + LOGP(warn, "Created medium {}", matName); } } return mat; @@ -75,12 +69,10 @@ void ITS3Layer::createLayer(TGeoVolume* motherVolume) createLayerImpl(); mBuilt = true; - LOGP(info, "ITS3-Layer: Created Layer {} with mR={} (minR={}, maxR={})", mNLayer, mR, mRmin, mRmax); if (motherVolume == nullptr) { return; } // Add it to motherVolume - LOGP(debug, " `-> Attaching to motherVolume '{}'", motherVolume->GetName()); auto* trans = new TGeoTranslation(0, 0, -constants::segment::lengthSensitive / 2.); motherVolume->AddNode(mLayer, 0, trans); } @@ -91,15 +83,9 @@ void ITS3Layer::createPixelArray() return; } // A pixel array is pure silicon and the sensitive part of our detector. - // It will be segmented into a 442x156 matrix by the - // SuperSegmentationAlpide. - // Pixel Array is just a longer version of the biasing but starts in phi at - // biasPhi2. using namespace its3c::pixelarray; - double pixelArrayPhi1 = constants::tile::readout::width / mR * o2m::Rad2Deg; - double pixelArrayPhi2 = width / mR * o2m::Rad2Deg + pixelArrayPhi1; - auto pixelArray = new TGeoTubeSeg(mRmin, mRmax, length / 2., - pixelArrayPhi1, pixelArrayPhi2); + double pixelArrayPhi = width / mR * o2m::Rad2Deg; + auto pixelArray = new TGeoTubeSeg(mRmin, mRmax, length / 2., 0, pixelArrayPhi); mPixelArray = new TGeoVolume(its3TGeo::getITS3PixelArrayPattern(mNLayer), pixelArray, mSilicon); mPixelArray->SetLineColor(color); mPixelArray->RegisterYourself(); @@ -131,8 +117,9 @@ void ITS3Layer::createTile() mTile->AddNode(readoutVol, 0, zMoveReadout); // Pixel Array is just a longer version of the biasing but starts in phi at - // biasPhi2. - mTile->AddNode(mPixelArray, 0); + // readoutPhi2. + auto phiRotPixelArray = new TGeoRotation(Form("its3PhiPixelArrayOffset_%d", mNLayer), readoutPhi2, 0, 0); + mTile->AddNode(mPixelArray, 0, phiRotPixelArray); // Biasing double biasPhi1 = constants::pixelarray::width / mR * o2m::Rad2Deg + readoutPhi2; @@ -199,7 +186,7 @@ void ITS3Layer::createRSU() // Rotation for top half and vertical mirroring double phi = width / mR * o2m::Rad2Deg; - auto rot = new TGeoRotation("", 0, 0, -phi); + auto rot = new TGeoRotation(Form("its3RotHalfBarrel_%d", mNLayer), 0, 0, -phi); rot->ReflectY(true); // Upper Left @@ -276,11 +263,19 @@ void ITS3Layer::createChip() mChip = new TGeoVolumeAssembly(its3TGeo::getITS3ChipPattern(mNLayer)); mChip->VisibleDaughters(); + auto phiOffset = constants::segment::width / mR * o2m::Rad2Deg; for (unsigned int i{0}; i < constants::nSegments[mNLayer]; ++i) { - double phiOffset = constants::segment::width / mR * o2m::Rad2Deg; - auto rot = new TGeoRotation("", 0, 0, phiOffset * i); + auto rot = new TGeoRotation(Form("its3PhiSegmentOffset_%d_%d", mNLayer, i), 0, 0, phiOffset * i); mChip->AddNode(mSegment, i, rot); } + + // Add metal stack positioned radially outward + auto zMoveMetal = new TGeoTranslation(0, 0, constants::metalstack::length / 2. - constants::segment::lec::length); + auto metal = new TGeoTubeSeg(mRmax, mRmax + constants::metalstack::thickness, constants::metalstack::length / 2., 0, constants::nSegments[mNLayer] * phiOffset); + auto metalVol = new TGeoVolume(Form("metal%d", mNLayer), metal, mCopper); + metalVol->SetLineColor(constants::metalstack::color); + metalVol->RegisterYourself(); + mChip->AddNode(metalVol, 0, zMoveMetal); } void ITS3Layer::createCarbonForm() @@ -296,7 +291,7 @@ void ITS3Layer::createCarbonForm() mCarbonForm->VisibleDaughters(); double dRadius = -1; if (mNLayer < 2) { - dRadius = constants::radii[mNLayer + 1] - constants::radii[mNLayer] - constants::thickness; + dRadius = constants::radii[mNLayer + 1] - constants::radii[mNLayer] - constants::totalThickness; } else { dRadius = 0.7; // TODO: lack of carbon foam radius for layer 2, use 0.7mm as a temporary value } @@ -372,8 +367,8 @@ void ITS3Layer::createLayerImpl() // The offset is the right angle triangle of the middle radius with the // transverse axis. double phiOffset = std::asin(constants::equatorialGap / 2. / mR) * o2m::Rad2Deg; - auto rotTop = new TGeoRotation("", 0, 0, +phiOffset); - auto rotBot = new TGeoRotation("", 0, 0, phiOffset + 180); + auto rotTop = new TGeoRotation(Form("its3CarbonPhiOffsetTop_%d", mNLayer), 0, 0, +phiOffset); + auto rotBot = new TGeoRotation(Form("its3CarbonPhiOffsetBot_%d", mNLayer), 0, 0, phiOffset + 180); mLayer->AddNode(mCarbonForm, 0, rotTop); mLayer->AddNode(mCarbonForm, 1, rotBot); @@ -412,8 +407,7 @@ void ITS3Layer::buildPartial(TGeoVolume* motherVolume, TGeoMatrix* mat, BuildLev case BuildLevel::kLayer: [[fallthrough]]; default: - createLayerImpl(); - motherVolume->AddNode(mLayer, 0, mat); + createLayer(motherVolume); } LOGP(info, "Partially built ITS3-{}-{}", mNLayer, getName(level)); } diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h index b9af595018a34..fca3f5d63c2c4 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h @@ -18,6 +18,7 @@ #pragma link C++ class o2::its3::ITS3Layer + ; #pragma link C++ class o2::its3::ITS3Services + ; #pragma link C++ class o2::its3::DescriptorInnerBarrelITS3 + ; +#pragma link C++ class o2::its3::DigiParams + ; #pragma link C++ class o2::its3::Digitizer + ; #endif From f0eebb64b1ef1ce4c1ebf79fa531ae06bf7049f5 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 12 Apr 2025 12:45:08 +0200 Subject: [PATCH 0223/1764] ITS3: remove unnecessary reader/writers (#14157) Signed-off-by: Felix Schlepper --- .../Upgrades/ITS3/workflow/CMakeLists.txt | 25 ---- .../include/ITS3Workflow/ClusterReaderSpec.h | 76 ----------- .../include/ITS3Workflow/ClusterWriterSpec.h | 31 ----- .../ITS3Workflow/ClusterWriterWorkflow.h | 31 ----- .../include/ITS3Workflow/TrackReaderSpec.h | 79 ------------ .../include/ITS3Workflow/TrackWriterSpec.h | 31 ----- .../include/ITS3Workflow/VertexReaderSpec.h | 65 ---------- .../ITS3/workflow/src/ClusterReaderSpec.cxx | 122 ------------------ .../ITS3/workflow/src/ClusterWriterSpec.cxx | 72 ----------- .../workflow/src/ClusterWriterWorkflow.cxx | 36 ------ .../ITS3/workflow/src/RecoWorkflow.cxx | 8 +- .../ITS3/workflow/src/TrackReaderSpec.cxx | 120 ----------------- .../ITS3/workflow/src/TrackWriterSpec.cxx | 82 ------------ .../ITS3/workflow/src/VertexReaderSpec.cxx | 84 ------------ 14 files changed, 4 insertions(+), 858 deletions(-) delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx diff --git a/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt b/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt index 649e4d737d42c..bcb3cf46375e9 100644 --- a/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt @@ -15,13 +15,8 @@ o2_add_library(ITS3Workflow SOURCES src/DigitReaderSpec.cxx src/DigitWriterSpec.cxx src/RecoWorkflow.cxx - src/ClusterWriterWorkflow.cxx src/ClustererSpec.cxx - src/ClusterWriterSpec.cxx src/TrackerSpec.cxx - src/TrackWriterSpec.cxx - src/TrackReaderSpec.cxx - src/VertexReaderSpec.cxx PUBLIC_LINK_LIBRARIES O2::Framework O2::SimConfig O2::DataFormatsITSMFT @@ -35,27 +30,7 @@ o2_add_library(ITS3Workflow O2::GPUTracking O2::ITSBase) -# o2_add_executable(digit-writer-workflow -# SOURCES src/digit-writer-workflow.cxx -# COMPONENT_NAME its3 -# PUBLIC_LINK_LIBRARIES O2::ITS3Workflow) - -# o2_add_executable(digit-reader-workflow -# SOURCES src/digit-reader-workflow.cxx -# COMPONENT_NAME its3 -# PUBLIC_LINK_LIBRARIES O2::ITS3Workflow) - o2_add_executable(reco-workflow SOURCES src/its3-reco-workflow.cxx COMPONENT_NAME its3 PUBLIC_LINK_LIBRARIES O2::ITS3Workflow) - -# o2_add_executable(cluster-writer-workflow -# SOURCES src/its-cluster-writer-workflow.cxx -# COMPONENT_NAME its -# PUBLIC_LINK_LIBRARIES O2::ITSWorkflow) - -# o2_add_executable(cluster-reader-workflow -# SOURCES src/its-cluster-reader-workflow.cxx -# COMPONENT_NAME its -# PUBLIC_LINK_LIBRARIES O2::ITSWorkflow) diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h deleted file mode 100644 index c411c2accace1..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterReaderSpec.h - -#ifndef O2_ITSMFT_CLUSTERREADER -#define O2_ITSMFT_CLUSTERREADER - -#include "TFile.h" -#include "TTree.h" - -#include "Framework/DataProcessorSpec.h" -#include "Framework/Task.h" -#include "Headers/DataHeader.h" -#include "DataFormatsITSMFT/CompCluster.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" -#include "DataFormatsITSMFT/ROFRecord.h" - -using namespace o2::framework; - -namespace o2::its3 -{ - -class ClusterReader : public Task -{ - public: - ClusterReader(bool useMC, bool usePatterns = true); - - void init(InitContext& ic) final; - void run(ProcessingContext& pc) final; - - protected: - void connectTree(const std::string& filename); - - std::vector mClusROFRec, *mClusROFRecPtr = &mClusROFRec; - std::vector mClusterCompArray, *mClusterCompArrayPtr = &mClusterCompArray; - std::vector mPatternsArray, *mPatternsArrayPtr = &mPatternsArray; - o2::dataformats::MCTruthContainer mClusterMCTruth, *mClusterMCTruthPtr = &mClusterMCTruth; - std::vector mClusMC2ROFs, *mClusMC2ROFsPtr = &mClusMC2ROFs; - - o2::header::DataOrigin mOrigin = o2::header::gDataOriginITS; - - std::unique_ptr mFile; - std::unique_ptr mTree; - - bool mUseMC = true; // use MC truth - bool mUsePatterns = true; // send patterns - - std::string mDetName = "ITS"; // pretending to be ITS - std::string mDetNameLC = "its"; - std::string mDetNameReal = "IT3"; - std::string mFileName = ""; - std::string mClusTreeName = "o2sim"; - std::string mClusROFBranchName = "ClustersROF"; - std::string mClusterPattBranchName = "ClusterPatt"; - std::string mClusterCompBranchName = "ClusterComp"; - std::string mClustMCTruthBranchName = "ClusterMCTruth"; - std::string mClustMC2ROFBranchName = "ClustersMC2ROF"; -}; - -/// create a processor spec -/// read ITS/MFT cluster data from a root file -framework::DataProcessorSpec getITS3ClusterReaderSpec(bool useMC = true, bool usePatterns = true); - -} // namespace o2::its3 - -#endif /* O2_ITSMFT_CLUSTERREADER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h deleted file mode 100644 index 49106871d89d5..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterWriterSpec.h - -#ifndef O2_ITS_CLUSTERWRITER -#define O2_ITS_CLUSTERWRITER - -#include "Framework/DataProcessorSpec.h" - -namespace o2 -{ -namespace its3 -{ - -/// create a processor spec -/// write ITS clusters to ROOT file -framework::DataProcessorSpec getClusterWriterSpec(bool useMC); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS_CLUSTERWRITER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h deleted file mode 100644 index 05268e7ca3a1e..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifndef O2_ITS_CLUSTER_WRITER_WORKFLOW_H -#define O2_ITS_CLUSTER_WRITER_WORKFLOW_H - -/// @file ClusterWriterWorkflow.h - -#include "Framework/WorkflowSpec.h" - -namespace o2 -{ -namespace its3 -{ - -namespace cluster_writer_workflow -{ -framework::WorkflowSpec getWorkflow(bool useMC); -} - -} // namespace its3 -} // namespace o2 -#endif diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h deleted file mode 100644 index 1686b7c275941..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackReaderSpec.h - -#ifndef O2_ITS3_TRACKREADER -#define O2_ITS3_TRACKREADER - -#include "TFile.h" -#include "TTree.h" - -#include "Framework/DataProcessorSpec.h" -#include "Framework/Task.h" -#include "Headers/DataHeader.h" -#include "DataFormatsITS/TrackITS.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" -#include "DataFormatsITSMFT/ROFRecord.h" -#include "ReconstructionDataFormats/Vertex.h" - -namespace o2 -{ -namespace its3 -{ - -class TrackReader : public o2::framework::Task -{ - using Vertex = o2::dataformats::Vertex>; - - public: - TrackReader(bool useMC = true); - ~TrackReader() override = default; - void init(o2::framework::InitContext& ic) final; - void run(o2::framework::ProcessingContext& pc) final; - - protected: - void connectTree(const std::string& filename); - - std::vector mROFRec, *mROFRecInp = &mROFRec; - std::vector mVerticesROFRec, *mVerticesROFRecInp = &mVerticesROFRec; - std::vector mTracks, *mTracksInp = &mTracks; - std::vector mVertices, *mVerticesInp = &mVertices; - std::vector mClusInd, *mClusIndInp = &mClusInd; - std::vector mMCTruth, *mMCTruthInp = &mMCTruth; - std::vector mMCVertTruth, *mMCVTruthInp = &mMCTruth; - - o2::header::DataOrigin mOrigin = o2::header::gDataOriginITS; - - bool mUseMC = true; // use MC truth - - std::unique_ptr mFile; - std::unique_ptr mTree; - std::string mInputFileName = ""; - std::string mTrackTreeName = "o2sim"; - std::string mROFBranchName = "ITSTracksROF"; - std::string mTrackBranchName = "ITSTrack"; - std::string mClusIdxBranchName = "ITSTrackClusIdx"; - std::string mVertexBranchName = "Vertices"; - std::string mVertexROFBranchName = "VerticesROF"; - std::string mTrackMCTruthBranchName = "ITSTrackMCTruth"; - std::string mTrackMCVertTruthBranchName = "ITSVertexMCTruth"; -}; - -/// create a processor spec -/// read ITS track data from a root file -framework::DataProcessorSpec getITS3TrackReaderSpec(bool useMC = true); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS3_TRACKREADER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h deleted file mode 100644 index 32f704fd61b45..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackWriterSpec.h - -#ifndef O2_ITS3_TRACKWRITER -#define O2_ITS3_TRACKWRITER - -#include "Framework/DataProcessorSpec.h" - -namespace o2 -{ -namespace its3 -{ - -/// create a processor spec -/// write ITS tracks to ROOT file -o2::framework::DataProcessorSpec getTrackWriterSpec(bool useMC); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS_TRACKWRITER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h deleted file mode 100644 index 6c723d07982f2..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file VertexReaderSpec.h - -#ifndef O2_ITS3_VERTEXREADER -#define O2_ITS3_VERTEXREADER - -#include "TFile.h" -#include "TTree.h" - -#include "Framework/DataProcessorSpec.h" -#include "Framework/Task.h" -#include "ReconstructionDataFormats/Vertex.h" -#include "DataFormatsITSMFT/ROFRecord.h" - -namespace o2 -{ -namespace its3 -{ -// read ITS vertices from the output tree of ITS tracking - -class VertexReader : public o2::framework::Task -{ - using Vertex = o2::dataformats::Vertex>; - - public: - VertexReader() = default; - ~VertexReader() override = default; - void init(o2::framework::InitContext& ic) final; - void run(o2::framework::ProcessingContext& pc) final; - - protected: - void connectTree(const std::string& filename); - void accumulate(); - - std::vector mVerticesROFRec, *mVerticesROFRecPtr = &mVerticesROFRec; - std::vector mVertices, *mVerticesPtr = &mVertices; - - o2::header::DataOrigin mOrigin = o2::header::gDataOriginITS; - - std::unique_ptr mFile; - std::unique_ptr mTree; - std::string mFileName = ""; - std::string mVertexTreeName = "o2sim"; - std::string mVertexBranchName = "Vertices"; - std::string mVertexROFBranchName = "VerticesROF"; -}; - -/// create a processor spec -/// read ITS vertex data from a root file -o2::framework::DataProcessorSpec getITS3VertexReaderSpec(); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS3_VERTEXREADER */ diff --git a/Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx deleted file mode 100644 index 5030b1fcdd30a..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterReaderSpec.cxx - -#include - -#include "TTree.h" - -#include "Framework/ControlService.h" -#include "Framework/ConfigParamRegistry.h" -#include "Framework/Logger.h" -#include "ITS3Workflow/ClusterReaderSpec.h" -#include -#include "CommonUtils/NameConf.h" - -using namespace o2::framework; -using namespace o2::itsmft; - -namespace o2 -{ -namespace its3 -{ - -ClusterReader::ClusterReader(bool useMC, bool usePatterns) -{ - mUseMC = useMC; - mUsePatterns = usePatterns; -} - -void ClusterReader::init(InitContext& ic) -{ - mFileName = o2::utils::concat_string(o2::base::NameConf::rectifyDirectory(ic.options().get("input-dir")), - ic.options().get((mDetNameLC + "-cluster-infile").c_str())); - connectTree(mFileName); -} - -void ClusterReader::run(ProcessingContext& pc) -{ - auto ent = mTree->GetReadEntry() + 1; - assert(ent < mTree->GetEntries()); // this should not happen - mTree->GetEntry(ent); - LOG(info) << mDetNameReal << "ClusterReader pushes " << mClusROFRec.size() << " ROFRecords," - << mClusterCompArray.size() << " compact clusters at entry " << ent; - - // This is a very ugly way of providing DataDescription, which anyway does not need to contain detector name. - // To be fixed once the names-definition class is ready - pc.outputs().snapshot(Output{mOrigin, "CLUSTERSROF", 0}, mClusROFRec); - pc.outputs().snapshot(Output{mOrigin, "COMPCLUSTERS", 0}, mClusterCompArray); - if (mUsePatterns) { - pc.outputs().snapshot(Output{mOrigin, "PATTERNS", 0}, mPatternsArray); - } - if (mUseMC) { - pc.outputs().snapshot(Output{mOrigin, "CLUSTERSMCTR", 0}, mClusterMCTruth); - pc.outputs().snapshot(Output{mOrigin, "CLUSTERSMC2ROF", 0}, mClusMC2ROFs); - } - - if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { - pc.services().get().endOfStream(); - pc.services().get().readyToQuit(QuitRequest::Me); - } -} - -void ClusterReader::connectTree(const std::string& filename) -{ - mTree.reset(nullptr); // in case it was already loaded - mFile.reset(TFile::Open(filename.c_str())); - assert(mFile && !mFile->IsZombie()); - mTree.reset((TTree*)mFile->Get(mClusTreeName.c_str())); - assert(mTree); - - mTree->SetBranchAddress((mDetName + mClusROFBranchName).c_str(), &mClusROFRecPtr); - mTree->SetBranchAddress((mDetName + mClusterCompBranchName).c_str(), &mClusterCompArrayPtr); - if (mUsePatterns) { - mTree->SetBranchAddress((mDetName + mClusterPattBranchName).c_str(), &mPatternsArrayPtr); - } - if (mUseMC) { - if (mTree->GetBranch((mDetName + mClustMCTruthBranchName).c_str()) && - mTree->GetBranch((mDetName + mClustMC2ROFBranchName).c_str())) { - mTree->SetBranchAddress((mDetName + mClustMCTruthBranchName).c_str(), &mClusterMCTruthPtr); - mTree->SetBranchAddress((mDetName + mClustMC2ROFBranchName).c_str(), &mClusMC2ROFsPtr); - } else { - LOG(info) << "MC-truth is missing"; - mUseMC = false; - } - } - LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; -} - -DataProcessorSpec getITS3ClusterReaderSpec(bool useMC, bool usePatterns) -{ - std::vector outputSpec; - outputSpec.emplace_back("IT3", "CLUSTERSROF", 0, Lifetime::Timeframe); - outputSpec.emplace_back("IT3", "COMPCLUSTERS", 0, Lifetime::Timeframe); - if (usePatterns) { - outputSpec.emplace_back("IT3", "PATTERNS", 0, Lifetime::Timeframe); - } - if (useMC) { - outputSpec.emplace_back("IT3", "CLUSTERSMCTR", 0, Lifetime::Timeframe); - outputSpec.emplace_back("IT3", "CLUSTERSMC2ROF", 0, Lifetime::Timeframe); - } - - return DataProcessorSpec{ - "its3-cluster-reader", - Inputs{}, - outputSpec, - AlgorithmSpec{adaptFromTask(useMC, usePatterns)}, - Options{ - {"its-cluster-infile", VariantType::String, "o2clus_its.root", {"Name of the input cluster file"}}, - {"input-dir", VariantType::String, "none", {"Input directory"}}}}; -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx deleted file mode 100644 index 0231560b3ac25..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterWriterSpec.cxx - -#include - -#include "ITS3Workflow/ClusterWriterSpec.h" -#include "DPLUtils/MakeRootTreeWriterSpec.h" -#include "DataFormatsITSMFT/CompCluster.h" -#include "DataFormatsITSMFT/ROFRecord.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" - -using namespace o2::framework; - -namespace o2 -{ -namespace its3 -{ - -template -using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition; -using CompClusType = std::vector; -using PatternsType = std::vector; -using ROFrameRType = std::vector; -using LabelsType = o2::dataformats::MCTruthContainer; -using ROFRecLblT = std::vector; -using namespace o2::header; - -DataProcessorSpec getClusterWriterSpec(bool useMC) -{ - // Spectators for logging - // this is only to restore the original behavior - auto compClustersSize = std::make_shared(0); - auto compClustersSizeGetter = [compClustersSize](CompClusType const& compClusters) { - *compClustersSize = compClusters.size(); - }; - auto logger = [compClustersSize](std::vector const& rofs) { - LOG(info) << "ITS3ClusterWriter pulled " << *compClustersSize << " clusters, in " << rofs.size() << " RO frames"; - }; - return MakeRootTreeWriterSpec("its3-cluster-writer", - "o2clus_its.root", - MakeRootTreeWriterSpec::TreeAttributes{"o2sim", "Tree with ITS clusters"}, - BranchDefinition{InputSpec{"compclus", "ITS", "COMPCLUSTERS", 0}, - "ITSClusterComp", - compClustersSizeGetter}, - BranchDefinition{InputSpec{"patterns", "ITS", "PATTERNS", 0}, - "ITSClusterPatt"}, - BranchDefinition{InputSpec{"ROframes", "ITS", "CLUSTERSROF", 0}, - "ITSClustersROF", - logger}, - BranchDefinition{InputSpec{"labels", "ITS", "CLUSTERSMCTR", 0}, - "ITSClusterMCTruth", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""}, - BranchDefinition{InputSpec{"MC2ROframes", "ITS", "CLUSTERSMC2ROF", 0}, - "ITSClustersMC2ROF", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""})(); -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx deleted file mode 100644 index ae79b7797d57d..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterWriterWorkflow.cxx - -#include "ITS3Workflow/ClusterWriterWorkflow.h" -#include "ITS3Workflow/ClusterWriterSpec.h" - -namespace o2 -{ -namespace its3 -{ - -namespace cluster_writer_workflow -{ - -framework::WorkflowSpec getWorkflow(bool useMC) -{ - framework::WorkflowSpec specs; - - specs.emplace_back(getClusterWriterSpec(useMC)); - - return specs; -} - -} // namespace cluster_writer_workflow -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx index 721ef36335631..21ae5b4a72345 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx @@ -11,9 +11,9 @@ #include "ITS3Workflow/RecoWorkflow.h" #include "ITS3Workflow/ClustererSpec.h" -#include "ITS3Workflow/ClusterWriterSpec.h" #include "ITS3Workflow/TrackerSpec.h" -#include "ITS3Workflow/TrackWriterSpec.h" +#include "ITSWorkflow/ClusterWriterSpec.h" +#include "ITSWorkflow/TrackWriterSpec.h" #include "ITS3Workflow/DigitReaderSpec.h" #include "Framework/Logger.h" @@ -34,14 +34,14 @@ framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::g } if (!disableRootOutput) { - specs.emplace_back(o2::its3::getClusterWriterSpec(useMC)); + specs.emplace_back(o2::its::getClusterWriterSpec(useMC)); } if (trmode != "off") { specs.emplace_back(o2::its3::getTrackerSpec(useMC, useGeom, useTrig, trmode, overrideBeamPosition, dtype)); if (!disableRootOutput) { - specs.emplace_back(o2::its3::getTrackWriterSpec(useMC)); + specs.emplace_back(o2::its::getTrackWriterSpec(useMC)); } } diff --git a/Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx deleted file mode 100644 index 409fa69e7815b..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackReaderSpec.cxx - -#include -#include -#include "Framework/ControlService.h" -#include "Framework/ConfigParamRegistry.h" -#include "ITS3Workflow/TrackReaderSpec.h" -#include "CommonUtils/NameConf.h" - -using namespace o2::framework; -using namespace o2::its3; - -namespace o2 -{ -namespace its3 -{ - -TrackReader::TrackReader(bool useMC) -{ - mUseMC = useMC; -} - -void TrackReader::init(InitContext& ic) -{ - mInputFileName = o2::utils::Str::concat_string(o2::utils::Str::rectifyDirectory(ic.options().get("input-dir")), - ic.options().get("its3-tracks-infile")); - connectTree(mInputFileName); -} - -void TrackReader::run(ProcessingContext& pc) -{ - auto ent = mTree->GetReadEntry() + 1; - assert(ent < mTree->GetEntries()); // this should not happen - mTree->GetEntry(ent); - LOG(info) << "Pushing " << mTracks.size() << " track in " << mROFRec.size() << " ROFs at entry " << ent; - pc.outputs().snapshot(Output{mOrigin, "ITSTrackROF", 0}, mROFRec); - pc.outputs().snapshot(Output{mOrigin, "TRACKS", 0}, mTracks); - pc.outputs().snapshot(Output{mOrigin, "TRACKCLSID", 0}, mClusInd); - pc.outputs().snapshot(Output{"ITS", "VERTICES", 0}, mVertices); - pc.outputs().snapshot(Output{"ITS", "VERTICESROF", 0}, mVerticesROFRec); - if (mUseMC) { - pc.outputs().snapshot(Output{mOrigin, "TRACKSMCTR", 0}, mMCTruth); - pc.outputs().snapshot(Output{mOrigin, "VERTICESMCTR", 0}, mMCVertTruth); - } - - if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { - pc.services().get().endOfStream(); - pc.services().get().readyToQuit(QuitRequest::Me); - } -} - -void TrackReader::connectTree(const std::string& filename) -{ - mTree.reset(nullptr); // in case it was already loaded - mFile.reset(TFile::Open(filename.c_str())); - assert(mFile && !mFile->IsZombie()); - mTree.reset((TTree*)mFile->Get(mTrackTreeName.c_str())); - assert(mTree); - assert(mTree->GetBranch(mROFBranchName.c_str())); - - mTree->SetBranchAddress(mROFBranchName.c_str(), &mROFRecInp); - mTree->SetBranchAddress(mTrackBranchName.c_str(), &mTracksInp); - mTree->SetBranchAddress(mClusIdxBranchName.c_str(), &mClusIndInp); - if (!mTree->GetBranch(mVertexBranchName.c_str())) { - LOG(warning) << "No " << mVertexBranchName << " branch in " << mTrackTreeName << " -> vertices will be empty"; - } else { - mTree->SetBranchAddress(mVertexBranchName.c_str(), &mVerticesInp); - } - if (!mTree->GetBranch(mVertexROFBranchName.c_str())) { - LOG(warning) << "No " << mVertexROFBranchName << " branch in " << mTrackTreeName - << " -> vertices ROFrecords will be empty"; - } else { - mTree->SetBranchAddress(mVertexROFBranchName.c_str(), &mVerticesROFRecInp); - } - if (mUseMC) { - if (mTree->GetBranch(mTrackMCTruthBranchName.c_str())) { - mTree->SetBranchAddress(mTrackMCTruthBranchName.c_str(), &mMCTruthInp); - } else { - LOG(warning) << "MC-truth is missing, message will be empty"; - } - } - LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; -} - -DataProcessorSpec getITS3TrackReaderSpec(bool useMC) -{ - std::vector outputSpec; - outputSpec.emplace_back("ITS", "ITSTrackROF", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "TRACKS", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "TRACKCLSID", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "VERTICES", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "VERTICESROF", 0, Lifetime::Timeframe); - if (useMC) { - outputSpec.emplace_back("ITS", "TRACKSMCTR", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "VERTICESMCTR", 0, Lifetime::Timeframe); - } - - return DataProcessorSpec{ - "its3-track-reader", - Inputs{}, - outputSpec, - AlgorithmSpec{adaptFromTask(useMC)}, - Options{ - {"its-tracks-infile", VariantType::String, "o2trac_its.root", {"Name of the input ITS3 track file"}}, - {"input-dir", VariantType::String, "none", {"Input directory"}}}}; -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx deleted file mode 100644 index 856c806e74247..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackWriterSpec.cxx - -#include - -#include "ITS3Workflow/TrackWriterSpec.h" -#include "DPLUtils/MakeRootTreeWriterSpec.h" -#include "DataFormatsITS/TrackITS.h" -#include "DataFormatsITSMFT/ROFRecord.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" -#include "ReconstructionDataFormats/Vertex.h" - -using namespace o2::framework; - -namespace o2 -{ -namespace its3 -{ -using Vertex = o2::dataformats::Vertex>; - -template -using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition; -using LabelsType = std::vector; -using ROFRecLblT = std::vector; -using namespace o2::header; - -DataProcessorSpec getTrackWriterSpec(bool useMC) -{ - // Spectators for logging - // this is only to restore the original behavior - auto tracksSize = std::make_shared(0); - auto tracksSizeGetter = [tracksSize](std::vector const& tracks) { - *tracksSize = tracks.size(); - }; - auto logger = [tracksSize](std::vector const& rofs) { - LOG(info) << "ITS3TrackWriter pulled " << *tracksSize << " tracks, in " << rofs.size() << " RO frames"; - }; - // NOTE: We name the branches as ITS and not IT3 to ensure matching works. - return MakeRootTreeWriterSpec("its3-track-writer", - "o2trac_its.root", - MakeRootTreeWriterSpec::TreeAttributes{"o2sim", "Tree with ITS3 tracks"}, - BranchDefinition>{InputSpec{"tracks", "ITS", "TRACKS", 0}, - "ITSTrack", - tracksSizeGetter}, - BranchDefinition>{InputSpec{"trackClIdx", "ITS", "TRACKCLSID", 0}, - "ITSTrackClusIdx"}, - BranchDefinition>{InputSpec{"vertices", "ITS", "VERTICES", 0}, - "Vertices"}, - BranchDefinition>{InputSpec{"vtxROF", "ITS", "VERTICESROF", 0}, - "VerticesROF"}, - BranchDefinition>{InputSpec{"ROframes", "ITS", "ITSTrackROF", 0}, - "ITSTracksROF", - logger}, - BranchDefinition{InputSpec{"labels", "ITS", "TRACKSMCTR", 0}, - "ITSTrackMCTruth", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""}, - BranchDefinition{InputSpec{"labelsVertices", "ITS", "VERTICESMCTR", 0}, - "ITSVertexMCTruth", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""}, - BranchDefinition>{InputSpec{"purityVertices", "ITS", "VERTICESMCPUR", 0}, - "ITSVertexMCPurity", (useMC ? 1 : 0), ""}, - BranchDefinition{InputSpec{"MC2ROframes", "ITS", "ITSTrackMC2ROF", 0}, - "ITSTracksMC2ROF", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""})(); -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx deleted file mode 100644 index 25fd3e530161a..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file VertexReaderSpec.cxx - -#include - -#include "Framework/ControlService.h" -#include "Framework/ConfigParamRegistry.h" -#include "Framework/Logger.h" -#include "ITSWorkflow/VertexReaderSpec.h" -#include "CommonUtils/NameConf.h" - -using namespace o2::framework; -using namespace o2::its; - -namespace o2 -{ -namespace its -{ - -void VertexReader::init(InitContext& ic) -{ - mFileName = o2::utils::Str::concat_string(o2::utils::Str::rectifyDirectory(ic.options().get("input-dir")), - ic.options().get("its-vertex-infile")); - connectTree(mFileName); -} - -void VertexReader::run(ProcessingContext& pc) -{ - auto ent = mTree->GetReadEntry() + 1; - assert(ent < mTree->GetEntries()); // this should not happen - mTree->GetEntry(ent); - LOG(info) << "Pushing " << mVerticesPtr->size() << " vertices in " << mVerticesROFRecPtr->size() - << " ROFs at entry " << ent; - pc.outputs().snapshot(Output{"IT3", "VERTICES", 0}, mVertices); - pc.outputs().snapshot(Output{"IT3", "VERTICESROF", 0}, mVerticesROFRec); - - if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { - pc.services().get().endOfStream(); - pc.services().get().readyToQuit(QuitRequest::Me); - } -} - -void VertexReader::connectTree(const std::string& filename) -{ - mTree.reset(nullptr); // in case it was already loaded - mFile.reset(TFile::Open(filename.c_str())); - assert(mFile && !mFile->IsZombie()); - mTree.reset((TTree*)mFile->Get(mVertexTreeName.c_str())); - assert(mTree); - assert(mTree->GetBranch(mVertexBranchName.c_str())); - assert(mTree->GetBranch(mVertexROFBranchName.c_str())); - mTree->SetBranchAddress(mVertexBranchName.c_str(), &mVerticesPtr); - mTree->SetBranchAddress(mVertexROFBranchName.c_str(), &mVerticesROFRecPtr); - LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; -} - -DataProcessorSpec getITS3VertexReaderSpec() -{ - std::vector outputSpec; - outputSpec.emplace_back("IT3", "VERTICES", 0, Lifetime::Timeframe); - outputSpec.emplace_back("IT3", "VERTICESROF", 0, Lifetime::Timeframe); - - return DataProcessorSpec{ - "its3-vertex-reader", - Inputs{}, - outputSpec, - AlgorithmSpec{adaptFromTask()}, - Options{ - {"its3-vertex-infile", VariantType::String, "o2trac_its3.root", {"Name of the input ITS3 vertex file"}}, - {"input-dir", VariantType::String, "none", {"Input directory"}}}}; -} - -} // namespace its -} // namespace o2 From 7e212e1e87659b8f075ef961a0e70a11cbf104cc Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 14:02:18 +0200 Subject: [PATCH 0224/1764] jobutils: Don't treat bogus Geant message about exceptions as error --- Utilities/Tools/jobutils.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Utilities/Tools/jobutils.sh b/Utilities/Tools/jobutils.sh index 4d51ac8522c6d..54f1394197815 100644 --- a/Utilities/Tools/jobutils.sh +++ b/Utilities/Tools/jobutils.sh @@ -189,10 +189,12 @@ taskwrapper() { -e \"terminate called without an active\" \ -e \"\*\*\* Error in\"" # <--- LIBC fatal error messages - grepcommand="grep -a -H ${pattern} $logfile ${JOBUTILS_JOB_SUPERVISEDFILES} >> encountered_exceptions_list 2>/dev/null" + exclude_pattern="-e \"To change the tolerance or the exception severity\"" + + grepcommand="grep -a -H ${pattern} $logfile ${JOBUTILS_JOB_SUPERVISEDFILES} | grep -a -v ${exclude_pattern} >> encountered_exceptions_list 2>/dev/null" eval ${grepcommand} - grepcommand="grep -a -h --count ${pattern} $logfile ${JOBUTILS_JOB_SUPERVISEDFILES} 2>/dev/null" + grepcommand="cat encountered_exceptions_list 2>/dev/null | wc -l" # using eval here since otherwise the pattern is translated to a # a weirdly quoted stringlist RC=$(eval ${grepcommand}) From 7872ee5a85ac0762faa1dbb50da8dd0ddd904104 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 13:07:26 +0200 Subject: [PATCH 0225/1764] GPU CMake: cleanup and fix some todos --- GPU/GPUTracking/CMakeLists.txt | 43 ++++++++++++++-------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index e722d375e4b93..44a630fe19f48 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -230,31 +230,21 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") Interface/GPUO2InterfaceConfigurableParam.cxx) endif() +set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h + Base/GPUReconstructionKernelIncludes.template.h + Base/GPUReconstructionIncludesDeviceAll.template.h + cmake/GPUNoFastMathKernels.template.h + Definitions/GPUDefParameters.template.h + Definitions/GPUDefParametersLoad.template.inc) +set(GENERATED_HEADERS_LIST "") + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) -file(GENERATE # TODO: Do this as a list - OUTPUT include_gpu_onthefly/GPUReconstructionKernelList.h - INPUT Base/GPUReconstructionKernelList.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUReconstructionKernelIncludes.h - INPUT Base/GPUReconstructionKernelIncludes.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUReconstructionIncludesDeviceAll.h - INPUT Base/GPUReconstructionIncludesDeviceAll.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h - INPUT cmake/GPUNoFastMathKernels.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUDefParameters.h - INPUT Definitions/GPUDefParameters.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUDefParametersLoad.inc - INPUT Definitions/GPUDefParametersLoad.template.inc -) +foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) + get_filename_component(OUTPUT_FILE_NAME ${TEMPLATE_FILE} NAME) + string(REPLACE ".template" "" OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}) + file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) + list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) +endforeach() file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase CONTENT "$,REPLACE,[^A-Za-z0-9]+,_>,\n>" @@ -266,6 +256,7 @@ add_custom_command( VERBATIM COMMAND_EXPAND_LISTS ) +list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) include(kernels.cmake) @@ -405,11 +396,11 @@ set_source_files_properties(Base/GPUReconstructionLibrary.cxx PROPERTIES INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}") -# Make sure header files generated with add_custom_command are built +# Make sure header files generated with add_custom_command are built before being used target_sources(${targetName} PRIVATE FILE_SET "generatedHeaders" TYPE HEADERS - FILES ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h # TODO: build file list for this + FILES ${GENERATED_HEADERS_LIST} BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) # Add compile definitions and libraries depending on available optional dependencies From e3b82a84be0aff2dea342a291b6e50e177ec6b90 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 12:29:50 +0200 Subject: [PATCH 0226/1764] GPU: Fix typo in variable name, fix comments, fix debug messages --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 10 +++++----- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Global/GPUChainTracking.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d714c6833d18d..ed47358cc9d5c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -68,7 +68,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu int32_t nThreads = getNKernelHostThreads(false); if (nThreads > 1) { if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d Threads\n", nThreads); + printf("Running %d Threads\n", mThreading->activeThreads->max_concurrency()); } tbb::this_task_arena::isolate([&] { mThreading->activeThreads->execute([&] { @@ -91,10 +91,10 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu template <> inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - int32_t nnThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); - if (nnThreads > 1) { - tbb::parallel_for(0, nnThreads, [&](int iThread) { - size_t threadSize = size / nnThreads; + int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); + if (nThreads > 1) { + tbb::parallel_for(0, nThreads, [&](int iThread) { + size_t threadSize = size / nThreads; if (threadSize % 4096) { threadSize += 4096 - threadSize % 4096; } diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index dd72119e3b56f..e17f1fcd7091e 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -77,8 +77,8 @@ add_custom_command( create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1" - COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src" + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain system headers 1>&2 && exit 1" + COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src for system headers" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 5a68df8ac9527..9a9b1e36a167c 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,8 +115,8 @@ add_custom_command( create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1" - COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src" + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain system headers 1>&2 && exit 1" + COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src for system headers" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 37ad164d20a60..6753db280d5bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -267,7 +267,7 @@ bool GPUChainTracking::ValidateSettings() return false; } if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) { - GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); + GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) { diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 3c1ad9658566b..7faab410d20ea 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -76,7 +76,6 @@ function(o2_gpu_add_kernel kernel_name kernel_files) set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NAMES "${kernel_name}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_INCLUDES "${TMP_KERNEL_CLASS_FILE}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_FILES "${TMP_KERNEL_CLASS_FILE}.cxx") - # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround set(O2_GPU_KERNEL_TEMPLATE_FILES "GPUConstantMem.h") if (GPUCA_BUILD_DEBUG) @@ -102,6 +101,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files) list(TRANSFORM O2_GPU_KERNEL_TEMPLATE_FILES PREPEND "#include \"") list(JOIN O2_GPU_KERNEL_TEMPLATE_FILES "\n" O2_GPU_KERNEL_TEMPLATE_FILES) + # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround to create CUDA and HIP files string(REPLACE ", " "_" TMP_FILENAME "${kernel_name}") if(CUDA_ENABLED) set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.cu") From 4c4e0044afb9a37b1117d7a773815a3abb86a306 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 13:28:55 +0200 Subject: [PATCH 0227/1764] GPU: Clean up some unused parameters --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 10 +++++----- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 4 ++-- GPU/GPUTracking/Definitions/GPUDefParametersDefault.h | 7 ------- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 8 ++++---- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 9b6562d8e77ee..2f643706647ee 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -449,7 +449,7 @@ int32_t GPUReconstruction::Exit() if (mMemoryResources[i].mReuse >= 0) { continue; } - operator delete(mMemoryResources[i].mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(mMemoryResources[i].mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); mMemoryResources[i].mPtr = mMemoryResources[i].mPtrDevice = nullptr; } } @@ -577,7 +577,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) { if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { if (res->mPtrDevice && res->mReuse < 0) { - operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize); if (res->mReuse >= 0) { @@ -587,7 +587,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, } res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice; } else { - res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT GPUCA_OPERATOR_NEW_ALIGNMENT); + res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = GPUProcessor::alignPointer(res->mPtrDevice); res->SetPointers(res->mPtr); @@ -775,7 +775,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n"; } if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) { - operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = nullptr; res->mPtrDevice = nullptr; @@ -825,7 +825,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) for (uint32_t i = std::get<2>(mNonPersistentMemoryStack.back()); i < mNonPersistentIndividualAllocations.size(); i++) { GPUMemoryResource* res = mNonPersistentIndividualAllocations[i]; if (res->mReuse < 0) { - operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = nullptr; res->mPtrDevice = nullptr; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index ed47358cc9d5c..9b569d3e88f3c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -202,7 +202,7 @@ int32_t GPUReconstructionCPU::InitDevice() if (mDeviceMemorySize > mHostMemorySize) { mHostMemorySize = mDeviceMemorySize; } - mHostMemoryBase = operator new(mHostMemorySize GPUCA_OPERATOR_NEW_ALIGNMENT); + mHostMemoryBase = operator new(mHostMemorySize, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } mHostMemoryPermanent = mHostMemoryBase; ClearAllocatedMemory(); @@ -218,7 +218,7 @@ int32_t GPUReconstructionCPU::ExitDevice() { if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { - operator delete(mHostMemoryBase GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(mHostMemoryBase, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } mHostMemoryPool = mHostMemoryBase = mHostMemoryPoolEnd = mHostMemoryPermanent = nullptr; mHostMemorySize = 0; diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 4435e69c60ff6..44f3eb299d4c1 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -594,14 +594,11 @@ #define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters #define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets #define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit -#define GPUCA_TRACKER_CONSTANT_MEM ((size_t) 63 * 1024) // Amount of Constant Memory to reserve #define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device #define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format) - // #define GPUCA_KERNEL_DEBUGGER_OUTPUT // Some assertions to make sure the parameters are not invalid @@ -614,14 +611,10 @@ // Derived parameters #ifdef GPUCA_USE_TEXTURES #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache - #define GPUCA_TEXTURE_FETCH_NEIGHBORS // Fetch also in Neighbours Finder #endif #if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE) #define GPUCA_SORT_STARTHITS #endif -#define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) -#define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT - // clang-format on #endif // GPUDEFPARAMETERSDEFAULT_H diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 2e3e4725bd6aa..b32db2bfebf11 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -71,7 +71,7 @@ GPUChainTracking *chainTracking, *chainTrackingAsync, *chainTrackingPipeline; GPUChainITS *chainITS, *chainITSAsync, *chainITSPipeline; void unique_ptr_aligned_delete(char* v) { - operator delete(v GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(v, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } std::unique_ptr outputmemory(nullptr, unique_ptr_aligned_delete), outputmemoryPipeline(nullptr, unique_ptr_aligned_delete), inputmemory(nullptr, unique_ptr_aligned_delete); std::unique_ptr eventDisplay; @@ -221,20 +221,20 @@ int32_t ReadConfiguration(int argc, char** argv) if (configStandalone.outputcontrolmem) { bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr; - outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT)); + outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT))); if (forceEmptyMemory) { printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads"); memset(outputmemory.get(), 0, configStandalone.outputcontrolmem); } if (configStandalone.proc.doublePipeline) { - outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT)); + outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT))); if (forceEmptyMemory) { memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem); } } } if (configStandalone.inputcontrolmem) { - inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT)); + inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT))); } configStandalone.proc.showOutputStat = true; From 4952cee09a99f788e070dc513d14e9ebdfb04165 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 8 Apr 2025 11:26:52 +0200 Subject: [PATCH 0228/1764] ITS3: GPU tracking Signed-off-by: Felix Schlepper --- .../include/ITS3Workflow/RecoWorkflow.h | 1 + .../ITS3/workflow/src/RecoWorkflow.cxx | 40 +++++++++++++++++-- .../ITS3/workflow/src/its3-reco-workflow.cxx | 4 +- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h index bac8a128c5b39..1760aa1d850eb 100644 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h +++ b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h @@ -26,6 +26,7 @@ namespace o2::its3::reco_workflow framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::gpu::GPUDataTypes::DeviceType dtype, + bool useGPUWorkflow, bool upstreamDigits, bool upstreamClusters, bool disableRootOutput, diff --git a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx index 21ae5b4a72345..947e53f80ddf1 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx @@ -15,12 +15,18 @@ #include "ITSWorkflow/ClusterWriterSpec.h" #include "ITSWorkflow/TrackWriterSpec.h" #include "ITS3Workflow/DigitReaderSpec.h" -#include "Framework/Logger.h" +#include "GPUWorkflow/GPUWorkflowSpec.h" +#include "Framework/CCDBParamSpec.h" + +// Dummy pointers +using CompletionPolicyData = std::vector; +static CompletionPolicyData gPolicyData; +static std::shared_ptr gTask; namespace o2::its3::reco_workflow { -framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::gpu::GPUDataTypes::DeviceType dtype, +framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::gpu::GPUDataTypes::DeviceType dtype, bool useGPUWorkflow, bool upstreamDigits, bool upstreamClusters, bool disableRootOutput, bool useGeom, int useTrig, bool overrideBeamPosition) { framework::WorkflowSpec specs; @@ -38,8 +44,36 @@ framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::g } if (trmode != "off") { - specs.emplace_back(o2::its3::getTrackerSpec(useMC, useGeom, useTrig, trmode, overrideBeamPosition, dtype)); + if (useGPUWorkflow) { + o2::gpu::GPURecoWorkflowSpec::Config cfg; + cfg.runITSTracking = true; + cfg.isITS3 = true; + cfg.itsTriggerType = useTrig; + cfg.itsOverrBeamEst = overrideBeamPosition; + cfg.processMC = useMC; + Inputs ggInputs; + auto ggRequest = std::make_shared(false, true, false, true, true, + useGeom ? o2::base::GRPGeomRequest::Aligned : o2::base::GRPGeomRequest::None, + ggInputs, true); + if (!useGeom) { + ggRequest->addInput({"itsTGeo", "ITS", "GEOMTGEO", 0, Lifetime::Condition, framework::ccdbParamSpec("ITS/Config/Geometry")}, ggInputs); + } + + auto task = std::make_shared(&gPolicyData, cfg, std::vector(), 0, ggRequest); + gTask = task; + Inputs taskInputs = task->inputs(); + Options taskOptions = task->options(); + std::move(ggInputs.begin(), ggInputs.end(), std::back_inserter(taskInputs)); + specs.emplace_back(DataProcessorSpec{ + "its3-gpu-tracker", + taskInputs, + task->outputs(), + AlgorithmSpec{adoptTask(task)}, + taskOptions}); + } else { + specs.emplace_back(o2::its3::getTrackerSpec(useMC, useGeom, useTrig, trmode, overrideBeamPosition, dtype)); + } if (!disableRootOutput) { specs.emplace_back(o2::its::getTrackWriterSpec(useMC)); } diff --git a/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx index b8aec2541d31f..2f0eda73742cb 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx @@ -48,6 +48,7 @@ void customize(std::vector& workflowOptions) {"tracking-mode", o2::framework::VariantType::String, "off", {"off,sync,async,cosmics"}}, {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}, {"use-full-geometry", o2::framework::VariantType::Bool, false, {"use full geometry instead of the light-weight IT3 part"}}, + {"use-gpu-workflow", o2::framework::VariantType::Bool, false, {"use GPU workflow (default: false)"}}, {"gpu-device", o2::framework::VariantType::Int, 1, {"use gpu device: CPU=1,CUDA=2,HIP=3 (default: CPU)"}}}; o2::raw::HBFUtilsInitializer::addConfigOption(options); std::swap(workflowOptions, options); @@ -67,6 +68,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) auto extClusters = configcontext.options().get("clusters-from-upstream"); auto disableRootOutput = configcontext.options().get("disable-root-output"); auto useGeom = configcontext.options().get("use-full-geometry"); + auto useGPUWfx = configcontext.options().get("use-gpu-workflow"); std::transform(trmode.begin(), trmode.end(), trmode.begin(), [](unsigned char c) { return std::tolower(c); }); o2::conf::ConfigurableParam::updateFromString(configcontext.options().get("configKeyValues")); @@ -80,7 +82,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) LOG(fatal) << "Unknown trigger type requested for events prescaling: " << selTrig; } } - auto wf = o2::its3::reco_workflow::getWorkflow(useMC, trmode, gpuDevice, extDigits, extClusters, disableRootOutput, useGeom, trType, beamPosOVerride); + auto wf = o2::its3::reco_workflow::getWorkflow(useMC, trmode, gpuDevice, useGPUWfx, extDigits, extClusters, disableRootOutput, useGeom, trType, beamPosOVerride); // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit o2::raw::HBFUtilsInitializer hbfIni(configcontext, wf); From fad8881705586ffaadde0f965c937e05d3fd766c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 8 Apr 2025 11:26:15 +0200 Subject: [PATCH 0229/1764] GPU: Add ITS3 to workflow Signed-off-by: Felix Schlepper --- GPU/Workflow/CMakeLists.txt | 3 ++- .../include/GPUWorkflow/GPUWorkflowSpec.h | 1 + GPU/Workflow/src/GPUWorkflowITS.cxx | 16 ++++++++++++++++ GPU/Workflow/src/GPUWorkflowSpec.cxx | 10 +++++++--- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/GPU/Workflow/CMakeLists.txt b/GPU/Workflow/CMakeLists.txt index aa725168b9db2..db8d22dda1bae 100644 --- a/GPU/Workflow/CMakeLists.txt +++ b/GPU/Workflow/CMakeLists.txt @@ -27,7 +27,8 @@ o2_add_library(GPUWorkflow O2::DataFormatsGlobalTracking O2::DataFormatsTRD PRIVATE_LINK_LIBRARIES O2::GPUTracking - O2::ITSTrackingInterface) + O2::ITSTrackingInterface + $<$:O2::ITS3TrackingInterface>) o2_add_executable(reco-workflow COMPONENT_NAME gpu diff --git a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h index eda3b28c6cff6..0038233f1c376 100644 --- a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h +++ b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h @@ -130,6 +130,7 @@ class GPURecoWorkflowSpec : public o2::framework::Task bool runITSTracking = false; bool itsOverrBeamEst = false; bool tpcTriggerHandling = false; + bool isITS3 = false; }; GPURecoWorkflowSpec(CompletionPolicyData* policyData, Config const& specconfig, std::vector const& tpcsectors, uint64_t tpcSectorMask, std::shared_ptr& ggr, std::function** gPolicyOrder = nullptr); diff --git a/GPU/Workflow/src/GPUWorkflowITS.cxx b/GPU/Workflow/src/GPUWorkflowITS.cxx index db9303c431ae7..e56958cba2c9b 100644 --- a/GPU/Workflow/src/GPUWorkflowITS.cxx +++ b/GPU/Workflow/src/GPUWorkflowITS.cxx @@ -20,6 +20,10 @@ #include "ITStracking/TrackingInterface.h" +#ifdef ENABLE_UPGRADES +#include "ITS3Reconstruction/TrackingInterface.h" +#endif + namespace o2::gpu { @@ -35,6 +39,18 @@ void GPURecoWorkflowSpec::initFunctionITS(o2::framework::InitContext& ic) { o2::its::VertexerTraits* vtxTraits = nullptr; o2::its::TrackerTraits* trkTraits = nullptr; +#ifdef ENABLE_UPGRADES + if (mSpecConfig.isITS3) { + mITSTrackingInterface = std::make_unique(mSpecConfig.processMC, + mSpecConfig.itsTriggerType, + mSpecConfig.itsOverrBeamEst); + } else +#endif + { + mITSTrackingInterface = std::make_unique(mSpecConfig.processMC, + mSpecConfig.itsTriggerType, + mSpecConfig.itsOverrBeamEst); + } mITSTrackingInterface = std::make_unique(mSpecConfig.processMC, mSpecConfig.itsTriggerType, mSpecConfig.itsOverrBeamEst); diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index aa4f3cfca1289..7ad03ec58ae80 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -1174,9 +1174,13 @@ Inputs GPURecoWorkflowSpec::inputs() } else if (mSpecConfig.itsTriggerType == 2) { inputs.emplace_back("phystrig", "TRD", "TRKTRGRD", 0, Lifetime::Timeframe); } - inputs.emplace_back("itscldict", "ITS", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("ITS/Calib/ClusterDictionary")); - inputs.emplace_back("itsalppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); - + if (mSpecConfig.isITS3) { + inputs.emplace_back("cldict", "IT3", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("IT3/Calib/ClusterDictionary")); + inputs.emplace_back("alppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + } else { + inputs.emplace_back("itscldict", "ITS", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("ITS/Calib/ClusterDictionary")); + inputs.emplace_back("itsalppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + } if (mSpecConfig.itsOverrBeamEst) { inputs.emplace_back("meanvtx", "GLO", "MEANVERTEX", 0, Lifetime::Condition, ccdbParamSpec("GLO/Calib/MeanVertex", {}, 1)); } From 304938e708db28c837d8fc14a6248df3db3ce185 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 26 Feb 2025 15:41:21 +0100 Subject: [PATCH 0230/1764] ITS3: propagating split dict to external Signed-off-by: Felix Schlepper --- Detectors/GlobalTracking/src/MatchTPCITS.cxx | 8 ++++---- .../include/StrangenessTracking/StrangenessTracker.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Detectors/GlobalTracking/src/MatchTPCITS.cxx b/Detectors/GlobalTracking/src/MatchTPCITS.cxx index 403b7dbbb0e09..f689caed87351 100644 --- a/Detectors/GlobalTracking/src/MatchTPCITS.cxx +++ b/Detectors/GlobalTracking/src/MatchTPCITS.cxx @@ -671,7 +671,8 @@ bool MatchTPCITS::prepareITSData() auto pattID = clus.getPatternID(); unsigned int npix; #ifdef ENABLE_UPGRADES - if ((pattID == o2::itsmft::CompCluster::InvalidPatternID) || ((withITS3) ? mIT3Dict->isGroup(pattID) : mITSDict->isGroup(pattID))) { // braces guarantee evaluation order + auto ib = o2::its3::constants::detID::isDetITS3(clus.getChipID()); + if ((pattID == o2::itsmft::CompCluster::InvalidPatternID) || ((withITS3) ? mIT3Dict->isGroup(pattID, ib) : mITSDict->isGroup(pattID))) { // braces guarantee evaluation order #else if (pattID == o2::itsmft::CompCluster::InvalidPatternID || mITSDict->isGroup(pattID)) { #endif @@ -681,7 +682,7 @@ bool MatchTPCITS::prepareITSData() } else { #ifdef ENABLE_UPGRADES if (withITS3) { - npix = mIT3Dict->getNpixels(pattID); + npix = mIT3Dict->getNpixels(pattID, ib); } else { npix = mITSDict->getNpixels(pattID); } @@ -1444,8 +1445,7 @@ void MatchTPCITS::refitWinners(pmr::vector& matche #ifdef WITH_OPENMP #pragma omp parallel for schedule(dynamic) num_threads(mNThreads) \ - reduction(+ \ - : nFailedRefit) + reduction(+ : nFailedRefit) #endif for (int ifit = 0; ifit < nToFit; ifit++) { int iTPC = tpcToFit[ifit], iITS; diff --git a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h index 9ca34e548f1c8..11feac64c59ae 100644 --- a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h +++ b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h @@ -279,20 +279,20 @@ class StrangenessTracker for (unsigned int iClus{0}; iClus < ITSclus.size(); ++iClus) { auto& clus = ITSclus[iClus]; auto pattID = clus.getPatternID(); + auto ib = o2::its3::constants::detID::isDetITS3(clus.getChipID()); int npix; o2::itsmft::ClusterPattern patt; - if (pattID == o2::itsmft::CompCluster::InvalidPatternID || mdict->isGroup(pattID)) { + if (pattID == o2::itsmft::CompCluster::InvalidPatternID || mdict->isGroup(pattID, ib)) { patt.acquirePattern(pattIt); npix = patt.getNPixels(); } else { - npix = mdict->getNpixels(pattID); - patt = mdict->getPattern(pattID); + npix = mdict->getNpixels(pattID, ib); + patt = mdict->getPattern(pattID, ib); } clusSizeVec[iClus] = npix; } - // LOG(info) << " Patt Npixel: " << pattVec[0].getNPixels(); } #endif From 9a0a760ec1f251b6728807901e50511d49ed6ab7 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Mon, 14 Apr 2025 10:54:36 +0200 Subject: [PATCH 0231/1764] ITS: CellSeed forced cast to int for chi2 (#14173) @mconcas @mpuccio is there a reason why there the `getChi2()` method casts the chi2 to int? Seems to me that this is a bit broken. --- Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h index fa0473ae88462..9ed5daed447df 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h @@ -97,7 +97,7 @@ class CellSeed final : public o2::track::TrackParCovF GPUhd() void setFirstTrackletIndex(int trkl) { mTracklets[0] = trkl; }; GPUhd() int getSecondTrackletIndex() const { return mTracklets[1]; }; GPUhd() void setSecondTrackletIndex(int trkl) { mTracklets[1] = trkl; }; - GPUhd() int getChi2() const { return mChi2; }; + GPUhd() float getChi2() const { return mChi2; }; GPUhd() void setChi2(float chi2) { mChi2 = chi2; }; GPUhd() int getLevel() const { return mLevel; }; GPUhd() void setLevel(int level) { mLevel = level; }; From ad25169b756bf60e0c46e3cc69ed1cb2ec28e50f Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 13 Apr 2025 23:34:34 +0200 Subject: [PATCH 0232/1764] Fix decay It looks like the comment is correct and the PDG id is wrong. --- Steer/src/O2MCApplication.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Steer/src/O2MCApplication.cxx b/Steer/src/O2MCApplication.cxx index e1bba03e81c50..e44758241b8d2 100644 --- a/Steer/src/O2MCApplication.cxx +++ b/Steer/src/O2MCApplication.cxx @@ -887,7 +887,7 @@ void addSpecialParticles() mode4s[1][1] = 111; // pion0 bratio4s[2] = 40.; mode4s[2][0] = 1000010030; // tritium - mode4s[2][2] = 2212; // pion+ + mode4s[2][2] = 211; // pion+ mode4s[2][1] = 2112; // neutron TVirtualMC::GetMC()->SetDecayMode(1110020040, bratio4s, mode4s); From 0f9450ef8ec19b2dbd2c3b809f5cfcd716913880 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 14 Apr 2025 16:01:21 +0200 Subject: [PATCH 0233/1764] ONNX: Use CMake defines not env variables --- Common/ML/CMakeLists.txt | 16 ++++++++-------- Common/ML/src/OrtInterface.cxx | 32 +++++++++++++------------------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index 74be306c8b6a5..540fe8ebf271c 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -10,17 +10,17 @@ # or submit itself to any jurisdiction. # Pass ORT variables as a preprocessor definition -if(DEFINED ENV{ORT_ROCM_BUILD}) - add_compile_definitions(ORT_ROCM_BUILD=$ENV{ORT_ROCM_BUILD}) +if(ORT_ROCM_BUILD) + add_compile_definitions(ORT_ROCM_BUILD=1) endif() -if(DEFINED ENV{ORT_CUDA_BUILD}) - add_compile_definitions(ORT_CUDA_BUILD=$ENV{ORT_CUDA_BUILD}) +if(ORT_CUDA_BUILD) + add_compile_definitions(ORT_CUDA_BUILD=1) endif() -if(DEFINED ENV{ORT_MIGRAPHX_BUILD}) - add_compile_definitions(ORT_MIGRAPHX_BUILD=$ENV{ORT_MIGRAPHX_BUILD}) +if(ORT_MIGRAPHX_BUILD) + add_compile_definitions(ORT_MIGRAPHX_BUILD=1) endif() -if(DEFINED ENV{ORT_TENSORRT_BUILD}) - add_compile_definitions(ORT_TENSORRT_BUILD=$ENV{ORT_TENSORRT_BUILD}) +if(ORT_TENSORRT_BUILD) + add_compile_definitions(ORT_TENSORRT_BUILD=1) endif() o2_add_library(ML diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index fc784dd14d2dc..88f548bd4fe7b 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -59,29 +59,23 @@ void OrtModel::reset(std::unordered_map optionsMap) std::string dev_mem_str = "Hip"; #if defined(ORT_ROCM_BUILD) -#if ORT_ROCM_BUILD == 1 - if (device == "ROCM") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) ROCM execution provider set"; - } -#endif + if (device == "ROCM") { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId)); + LOG(info) << "(ORT) ROCM execution provider set"; + } #endif #if defined(ORT_MIGRAPHX_BUILD) -#if ORT_MIGRAPHX_BUILD == 1 - if (device == "MIGRAPHX") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) MIGraphX execution provider set"; - } -#endif + if (device == "MIGRAPHX") { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId)); + LOG(info) << "(ORT) MIGraphX execution provider set"; + } #endif #if defined(ORT_CUDA_BUILD) -#if ORT_CUDA_BUILD == 1 - if (device == "CUDA") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) CUDA execution provider set"; - dev_mem_str = "Cuda"; - } -#endif + if (device == "CUDA") { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId)); + LOG(info) << "(ORT) CUDA execution provider set"; + dev_mem_str = "Cuda"; + } #endif if (allocateDeviceMemory) { From 013e1514fc94bc13564dfcf7bafd0c3f633f6a69 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 15:10:20 +0200 Subject: [PATCH 0234/1764] Split GPUDefParameters file into constants, and RTC-dynamic parameters with a defaults file, and a wrapper! --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 2 +- GPU/GPUTracking/CMakeLists.txt | 8 +- GPU/GPUTracking/Definitions/GPUDef.h | 2 +- .../Definitions/GPUDefParametersConstants.h | 87 ++++++++++++ ...rsDefault.h => GPUDefParametersDefaults.h} | 132 ++---------------- .../GPUDefParametersLoad.template.inc | 2 +- ...e.h => GPUDefParametersRuntime.template.h} | 8 +- .../Definitions/GPUDefParametersWrapper.h | 66 +++++++++ .../Standalone/tools/dumpGPUDefParam.C | 7 +- 9 files changed, 182 insertions(+), 132 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersConstants.h rename GPU/GPUTracking/Definitions/{GPUDefParametersDefault.h => GPUDefParametersDefaults.h} (81%) rename GPU/GPUTracking/Definitions/{GPUDefParameters.template.h => GPUDefParametersRuntime.template.h} (87%) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 9b569d3e88f3c..39507beda8a55 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -16,7 +16,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUReconstructionThreading.h" #include "GPUChain.h" -#include "GPUDefParameters.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCClusterData.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 44a630fe19f48..f428d982394e0 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -129,7 +129,9 @@ set(HDRS_INSTALL DataTypes/GPUTriggerOutputs.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h - Definitions/GPUDefParametersDefault.h + Definitions/GPUDefParametersWrapper.h + Definitions/GPUDefParametersConstants.h + Definitions/GPUDefParametersDefaults.h Definitions/GPUDef.h Definitions/GPUDefMacros.h Definitions/GPULogging.h @@ -234,7 +236,7 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h Base/GPUReconstructionKernelIncludes.template.h Base/GPUReconstructionIncludesDeviceAll.template.h cmake/GPUNoFastMathKernels.template.h - Definitions/GPUDefParameters.template.h + Definitions/GPUDefParametersRuntime.template.h Definitions/GPUDefParametersLoad.template.inc) set(GENERATED_HEADERS_LIST "") @@ -258,7 +260,7 @@ add_custom_command( ) list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) -set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) +set(HDRS_INSTALL ${HDRS_INSTALL} ${GENERATED_HEADERS_LIST}) include(kernels.cmake) # Optional sources depending on optional dependencies diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index 404f35f971c94..c77b9ce159306 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" -#include "GPUDefParametersDefault.h" +#include "GPUDefParametersWrapper.h" #include "GPUCommonRtypes.h" // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h new file mode 100644 index 0000000000000..3a16d02ecf7c6 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -0,0 +1,87 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParametersConstants.h +/// \author David Rohr + +// This file contains compile-time constants, independent from the backend + +#ifndef GPUDEFPARAMETERSCONSTANTS_H +#define GPUDEFPARAMETERSCONSTANTS_H +// clang-format off + +#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! + +#if defined(__CUDACC__) || defined(__HIPCC__) + #define GPUCA_SPECIALIZE_THRUST_SORTS +#endif + +#define GPUCA_MAX_THREADS 1024 +#define GPUCA_MAX_STREAMS 36 + +#if defined(GPUCA_GPUCODE) + #define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU +#endif + +#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid +#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers +#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks + +// Default maximum numbers +#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters +#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets +#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit +#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device +#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host +#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread +#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread + +#ifdef GPUCA_GPUCODE + #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #endif + #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #endif + #ifndef GPUCA_ALTERNATE_BORDER_SORT + #define GPUCA_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_SORT_BEFORE_FIT + #define GPUCA_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_COMP_GATHER_KERNEL + #define GPUCA_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_COMP_GATHER_MODE + #define GPUCA_COMP_GATHER_MODE 2 + #endif +#else + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 + #define GPUCA_ALTERNATE_BORDER_SORT 0 + #define GPUCA_SORT_BEFORE_FIT 0 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #define GPUCA_THREAD_COUNT_FINDER 1 + #define GPUCA_COMP_GATHER_KERNEL 0 + #define GPUCA_COMP_GATHER_MODE 0 +#endif +#ifndef GPUCA_DEDX_STORAGE_TYPE + #define GPUCA_DEDX_STORAGE_TYPE float +#endif +#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float +#endif + +// clang-format on +#endif // GPUDEFPARAMETERSCONSTANTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h similarity index 81% rename from GPU/GPUTracking/Definitions/GPUDefParametersDefault.h rename to GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 44f3eb299d4c1..360fb1ffb5269 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -9,28 +9,22 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUDefParametersDefault.h +/// \file GPUDefParametersDefaults.h /// \author David Rohr -// This files contains compile-time constants affecting the GPU performance. -// Many of these constants are GPU-architecture specific. -// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc. -// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h +// This file contains compile-time constants affecting the GPU performance. -#ifndef GPUDEFPARAMETERSDEFAULT_H -#define GPUDEFPARAMETERSDEFAULT_H +#ifndef GPUDEFPARAMETERSDEFAULTS_H +#define GPUDEFPARAMETERSDEFAULTS_H // clang-format off -#include "GPUCommonDef.h" -#include "GPUDefMacros.h" - // Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) // GPU Run Configuration #ifdef GPUCA_GPUCODE #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT 256 + #define GPUCA_THREAD_COUNT_DEFAULT 256 #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 @@ -93,7 +87,7 @@ #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT 256 + #define GPUCA_THREAD_COUNT_DEFAULT 256 #define GPUCA_LB_GPUTPCCreateTrackingData 128 #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 @@ -156,7 +150,7 @@ #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT 512 + #define GPUCA_THREAD_COUNT_DEFAULT 512 #define GPUCA_LB_GPUTPCCreateTrackingData 384 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 @@ -219,7 +213,7 @@ #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT 512 + #define GPUCA_THREAD_COUNT_DEFAULT 512 #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 @@ -281,8 +275,8 @@ #ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type - #ifndef GPUCA_THREAD_COUNT - #define GPUCA_THREAD_COUNT 256 + #ifndef GPUCA_THREAD_COUNT_DEFAULT + #define GPUCA_THREAD_COUNT_DEFAULT 256 #endif #ifndef GPUCA_LB_GPUTPCCreateTrackingData #define GPUCA_LB_GPUTPCCreateTrackingData 256 @@ -486,13 +480,11 @@ #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256 #endif #ifndef GPUCA_LB_GPUMemClean16 - #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1 + #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT_DEFAULT, 1 #endif #ifndef GPUCA_LB_GPUitoa - #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1 + #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT_DEFAULT, 1 #endif - #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) - // These kernel launch-bounds are derrived from one of the constants set above #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression @@ -516,105 +508,7 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER -#else - #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. -#endif - -#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) - -#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! - -#if defined(__CUDACC__) || defined(__HIPCC__) - #define GPUCA_SPECIALIZE_THRUST_SORTS -#endif - -#ifndef GPUCA_NEIGHBORSFINDER_REGS - #define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 -#endif -#ifdef GPUCA_GPUCODE - #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 - #endif - #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 - #endif - #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif - #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 - #endif -#else - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #define GPUCA_SORT_BEFORE_FIT 0 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_THREAD_COUNT_FINDER 1 - #define GPUCA_COMP_GATHER_KERNEL 0 - #define GPUCA_COMP_GATHER_MODE 0 -#endif -#ifndef GPUCA_DEDX_STORAGE_TYPE - #define GPUCA_DEDX_STORAGE_TYPE float -#endif -#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#endif -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) -#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) - -#ifndef GPUCA_WARP_SIZE - #ifdef GPUCA_GPUCODE - #define GPUCA_WARP_SIZE 32 - #else - #define GPUCA_WARP_SIZE 1 - #endif -#endif - -#define GPUCA_MAX_THREADS 1024 -#define GPUCA_MAX_STREAMS 36 - -#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU -#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid -#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers -#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks - -// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling - -// Default maximum numbers -#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters -#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets -#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit -#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device -#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host -#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread -#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread - -// #define GPUCA_KERNEL_DEBUGGER_OUTPUT - -// Some assertions to make sure the parameters are not invalid -#if defined(GPUCA_GPUCODE) - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); -#endif - -// Derived parameters -#ifdef GPUCA_USE_TEXTURES - #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache -#endif -#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE) - #define GPUCA_SORT_STARTHITS #endif // clang-format on -#endif // GPUDEFPARAMETERSDEFAULT_H +#endif // GPUDEFPARAMETERSDEFAULTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index c17244572ee0c..edec6530c1c5d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -18,7 +18,7 @@ #define GPUCA_M_LB_EMPTY_1(...) __VA_ARGS__ #define GPUCA_M_LB_EMPTY0(...) GPUCA_M_CAT(GPUCA_M_LB_EMPTY_, __VA_OPT__(1))(__VA_ARGS__) -#include "GPUDefParameters.h" +#include "GPUDefParametersRuntime.h" #include "GPUDefMacros.h" #include #include diff --git a/GPU/GPUTracking/Definitions/GPUDefParameters.template.h b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h similarity index 87% rename from GPU/GPUTracking/Definitions/GPUDefParameters.template.h rename to GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h index 731cb76b89193..f3537c058a824 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParameters.template.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUDefParameters.h +/// \file GPUDefParametersRuntime.h /// \author David Rohr -#ifndef GPUDEFPARAMETERS_H -#define GPUDEFPARAMETERS_H +#ifndef GPUDEFPARAMETERSRUNTIME_H +#define GPUDEFPARAMETERSRUNTIME_H namespace o2::gpu { @@ -24,4 +24,4 @@ struct GPUDefParameters { // clang-format off }; // clang-format on } // namespace o2::gpu -#endif +#endif // GPUDEFPARAMETERSRUNTIME_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h new file mode 100644 index 0000000000000..7f2bb271d18c8 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -0,0 +1,66 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParametersWrapper.h +/// \author David Rohr + +// Wrapper file to load all compile-time parameters (architecture / rtc - dependent ones, and constant ones) +// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h + +#ifndef GPUDEFPARAMETERSWRAPPER_H +#define GPUDEFPARAMETERSWRAPPER_H +// clang-format off + +#include "GPUCommonDef.h" +#include "GPUDefMacros.h" + +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUDefParametersDefaults.h" +#endif +#include "GPUDefParametersConstants.h" + +#ifdef GPUCA_GPUCODE + #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) +#else + #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. +#endif + +#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) + +#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) + +#ifndef GPUCA_WARP_SIZE + #ifdef GPUCA_GPUCODE + #define GPUCA_WARP_SIZE 32 + #else + #define GPUCA_WARP_SIZE 1 + #endif +#endif + +// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling + +// #define GPUCA_KERNEL_DEBUGGER_OUTPUT + +// Some assertions to make sure the parameters are not invalid +#if defined(GPUCA_GPUCODE) + static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); + static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); + static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); +#endif + +// Derived parameters +#ifdef GPUCA_USE_TEXTURES + #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache +#endif + +// clang-format on +#endif // GPUDEFPARAMETERSWRAPPER_H diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index 4a72b0cef31a3..785c049816252 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -16,14 +16,15 @@ // ROOT_INCLUDE_PATH="`pwd`/include" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' // Logic for testing to load the default parameters -/*#define GPUCA_GPUCODE +/* #define GPUCA_GPUCODE #define GPUCA_GPUTYPE_AMPERE #define GPUCA_MAXN 40 #define GPUCA_ROW_COUNT 152 #define GPUCA_TPC_COMP_CHUNK_SIZE 1024 -#include "GPUDefParametersDefault.h"*/ +#include "GPUDefParametersConstants.h" +#include "GPUDefParametersDefaults.h" */ -// Load file that sets GPUDefParameters +// Alternatively, logic to load file that sets GPUDefParameters #include "testParam.h" #include "GPUDefParametersLoad.inc" From 260d7d579864cb8ab91fd2d40278b52b66e7b2d8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 20:05:43 +0200 Subject: [PATCH 0235/1764] GPU CUDA/HIP: Set warp size automatically, not hard-coded --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 9 +++------ .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 3 ++- GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h | 3 +++ GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h | 8 -------- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index f475929d49d50..47a9b675d27f6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -111,6 +111,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() constexpr int32_t reqVerMin = 0; #endif if (mProcessingSettings.rtc.enable && mProcessingSettings.rtctech.runTest == 2) { + mWarpSize = GPUCA_WARP_SIZE; genAndLoadRTC(); exit(0); } @@ -244,16 +245,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUInfo("\ttextureAlignment = %ld", (uint64_t)deviceProp.textureAlignment); GPUInfo(" "); } - if (deviceProp.warpSize != GPUCA_WARP_SIZE) { + if (deviceProp.warpSize != GPUCA_WARP_SIZE && !mProcessingSettings.rtc.enable) { throw std::runtime_error("Invalid warp size on GPU"); } + mWarpSize = deviceProp.warpSize; mBlockCount = deviceProp.multiProcessorCount; mMaxBackendThreads = std::max(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount); -#ifndef __HIPCC__ // CUDA - mWarpSize = 32; -#else // HIP - mWarpSize = 64; -#endif mDeviceName = deviceProp.name; mDeviceName += " (CUDA GPU)"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 5f481d2cb9058..abcd47ca01c90 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -73,7 +73,8 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } fclose(fp); } - const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true); + const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) + + "#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n"; if (mProcessingSettings.rtctech.printLaunchBounds || mProcessingSettings.debugLevel >= 3) { GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str()); } diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 360fb1ffb5269..396934e17da82 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -275,6 +275,9 @@ #ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type + #ifndef GPUCA_WARP_SIZE + #define GPUCA_WARP_SIZE 32 + #endif #ifndef GPUCA_THREAD_COUNT_DEFAULT #define GPUCA_THREAD_COUNT_DEFAULT 256 #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 7f2bb271d18c8..114c8d229b493 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -38,14 +38,6 @@ #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) #define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) -#ifndef GPUCA_WARP_SIZE - #ifdef GPUCA_GPUCODE - #define GPUCA_WARP_SIZE 32 - #else - #define GPUCA_WARP_SIZE 1 - #endif -#endif - // #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling // #define GPUCA_KERNEL_DEBUGGER_OUTPUT From a87ed76713dd648eecba500001afc682663ad46f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 20:54:30 +0200 Subject: [PATCH 0236/1764] GPU: Switch some preprocessor defines to constexpr --- .../Definitions/GPUDefParametersWrapper.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 114c8d229b493..ce7889fe7fadd 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -27,14 +27,17 @@ #endif #include "GPUDefParametersConstants.h" -#ifdef GPUCA_GPUCODE - #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) +namespace o2::gpu +{ +#if defined(GPUCA_GPUCODE) + GPUhdi() static constexpr uint32_t GPUCA_GET_THREAD_COUNT(uint32_t val, ...) { return val; } + GPUhdi() static constexpr uint32_t GPUCA_GET_WARP_COUNT(uint32_t val, ...) { return val / GPUCA_WARP_SIZE; } #else - #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. + static constexpr uint32_t GPUCA_WARP_SIZE = 1; // On the host, a thread is a block is a warp, and we run 1 "device thread" per block. + #define GPUCA_GET_THREAD_COUNT(...) 1 // This must be a define not a constexpr function + #define GPUCA_GET_WARP_COUNT(...) 1 // since launch bound constants are not defined in host-code, and must evaluate to 1! #endif -#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) #define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) @@ -46,7 +49,7 @@ #if defined(GPUCA_GPUCODE) static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); #endif // Derived parameters @@ -54,5 +57,7 @@ #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache #endif +} // namespace o2::gpu + // clang-format on #endif // GPUDEFPARAMETERSWRAPPER_H From 504bb9cb6102b535ff8f69e137b8d056cdbe043f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 20:55:00 +0200 Subject: [PATCH 0237/1764] GPU: Get rid of GPUCA_RTC_LB_..., use only GPUCA_LB_... --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- .../Base/cuda/GPUReconstructionCUDA.h | 3 +- .../GPUReconstructionCUDAIncludesSystem.h | 6 ++-- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 11 ------- .../cuda/GPUReconstructionCUDARTCCalls.cu | 32 +++++++++++++++++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +-- .../hip/GPUReconstructionHIPIncludesSystem.h | 6 ++-- .../Definitions/GPUDefParametersDefaults.h | 2 +- .../GPUDefParametersLoad.template.inc | 22 ++++++------- .../Definitions/GPUDefParametersWrapper.h | 4 +-- 10 files changed, 56 insertions(+), 36 deletions(-) create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index e17f1fcd7091e..3655eaf66055e 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -17,7 +17,7 @@ if(DEFINED CUDA_COMPUTETARGET) endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") -set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) +set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDARTCCalls.cu) set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesSystem.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index ac5920f769f25..3441c6b9a4fd6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -45,8 +45,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - void getRTCKernelCalls(std::vector& kernels); - template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); GPUReconstructionCUDAInternals* mInternals; @@ -91,6 +89,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels& kernels); void genAndLoadRTC(); void loadKernelModules(bool perKernel); const char *mRtcSrcExtension = ".src", *mRtcBinExtension = ".o"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h index 1cb3679fc30dc..3f072059a9ad7 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h @@ -12,8 +12,8 @@ /// \file GPUReconstructionCUDAIncludesSystem.h /// \author David Rohr -#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H -#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H +#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H +#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H #include #include @@ -32,4 +32,4 @@ #include #include -#endif +#endif // O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f8efd8428f035..cf08785e6b3d5 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -118,14 +118,3 @@ static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstS return retVal; }); #endif - -void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& kernels) -{ -#undef GPUCA_KRNL_LB -#undef __launch_bounds__ -#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); -#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_RTC_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL -#undef GPUCA_KRNL_LB -} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu new file mode 100644 index 0000000000000..571428dc39e21 --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu @@ -0,0 +1,32 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDARTCCalls.cu +/// \author David Rohr + +#define GPUCA_GPUCODE_HOSTONLY +#define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS + +#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args)) + +#include "GPUReconstructionCUDAIncludesSystem.h" +#include "GPUReconstructionCUDADef.h" +#include "GPUReconstructionCUDA.h" + +using namespace o2::gpu; + +void GPUReconstructionCUDA::getRTCKernelCalls(std::vector& kernels) +{ +#undef GPUCA_KRNL +#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); +#undef __launch_bounds__ +#include "GPUReconstructionKernelList.h" +} diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 9a9b1e36a167c..55211e5ff10f6 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -61,7 +61,7 @@ else() get_filename_component(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE) endif() -set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) +set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPRTCCalls.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesSystem.h) diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h index cfe1121ef1089..1a3a1ff0108af 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h @@ -12,8 +12,8 @@ /// \file GPUReconstructionHIPIncludesSystem.h /// \author David Rohr -#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H -#define O2_GPU_RECONSTRUCTIONHIPINCLUDES_H +#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H +#define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H #include #include @@ -25,4 +25,4 @@ #include #pragma GCC diagnostic pop -#endif +#endif // O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 396934e17da82..406fb5e8048d6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -14,7 +14,7 @@ // This file contains compile-time constants affecting the GPU performance. -#ifndef GPUDEFPARAMETERSDEFAULTS_H +#if !defined(GPUDEFPARAMETERSDEFAULTS_H) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. #define GPUDEFPARAMETERSDEFAULTS_H // clang-format off diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index edec6530c1c5d..938cedbdacc93 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -37,17 +37,17 @@ static GPUDefParameters GPUDefParametersLoad() }; } -#define GPUCA_EXPORT_KERNEL(name) \ - if (par.par_LB_maxThreads[i] > 0) { \ - o << "#define GPUCA_" << (forRTC ? "RTC_" : "") << "LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ - if (par.par_LB_minBlocks[i] > 0) { \ - o << ", " << par.par_LB_minBlocks[i]; \ - } \ - if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ - o << ", " << par.par_LB_forceBlocks[i]; \ - } \ - o << "\n"; \ - } \ +#define GPUCA_EXPORT_KERNEL(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define GPUCA_LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ i++; static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index ce7889fe7fadd..b61312b7b04be 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,14 +22,14 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#ifndef GPUCA_GPUCODE_GENRTC +#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" namespace o2::gpu { -#if defined(GPUCA_GPUCODE) +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) GPUhdi() static constexpr uint32_t GPUCA_GET_THREAD_COUNT(uint32_t val, ...) { return val; } GPUhdi() static constexpr uint32_t GPUCA_GET_WARP_COUNT(uint32_t val, ...) { return val / GPUCA_WARP_SIZE; } #else From 731144f9b7ea49b5001e3c0acfca7edd2970b184 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 22:17:17 +0200 Subject: [PATCH 0238/1764] GPU HIP: Don't hipify cxx files --- GPU/GPUTracking/Base/hip/CMakeLists.txt | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 55211e5ff10f6..3a03a054d4a7e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -30,14 +30,24 @@ if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") foreach(file ${GPUCA_HIP_FILE_LIST}) get_filename_component(ABS_CUDA_SORUCE ../cuda/${file} ABSOLUTE) get_filename_component(CUDA_SOURCE ${file} NAME) + get_filename_component(CUDA_SOURCE_EXT ${file} EXT) string(REPLACE ".cu" ".hip" HIP_SOURCE1 ${CUDA_SOURCE}) string(REPLACE "CUDA" "HIP" HIP_SOURCE ${HIP_SOURCE1}) - add_custom_command( - OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} - COMMAND ${hip_HIPIFY_PERL_EXECUTABLE} --quiet-warnings ${ABS_CUDA_SORUCE} | sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} - DEPENDS ${ABS_CUDA_SORUCE} - COMMENT "Hippifying ${HIP_SOURCE}" - ) + if(CUDA_SOURCE_EXT STREQUAL ".cu" OR CUDA_SOURCE_EXT STREQUAL ".h") + add_custom_command( + OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + COMMAND ${hip_HIPIFY_PERL_EXECUTABLE} --quiet-warnings ${ABS_CUDA_SORUCE} | sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + DEPENDS ${ABS_CUDA_SORUCE} + COMMENT "Hippifying ${HIP_SOURCE}" + ) + else() + add_custom_command( + OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + COMMAND sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' ${ABS_CUDA_SORUCE} > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + DEPENDS ${ABS_CUDA_SORUCE} + COMMENT "Generating HIP source ${HIP_SOURCE}" + ) + endif() list(APPEND HIP_SOURCES "${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}") endforeach() foreach(file ${GPUCA_HIP_LOCAL_FILE_LIST}) From 3812d54df627c334249c6ca29a4b7615694f7dbb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 22:31:00 +0200 Subject: [PATCH 0239/1764] GPU: Fix some global preprocessor defines that were used incorrectly --- GPU/Common/GPUCommonDefAPI.h | 2 +- .../GPUTPCCompressionKernels.cxx | 3 +++ .../DataCompression/GPUTPCCompressionKernels.h | 18 +++++++++++------- .../Definitions/GPUDefParametersDefaults.h | 2 -- .../Definitions/GPUDefParametersWrapper.h | 7 ------- .../SectorTracker/GPUTPCNeighboursFinder.h | 1 + .../SectorTracker/GPUTPCTrackletSelector.h | 1 + .../GPUTPCCFNoiseSuppression.h | 4 +++- .../GPUTPCCFStreamCompaction.h | 7 +++++++ .../GPUTPCNNClusterizerKernels.h | 8 +++++++- 10 files changed, 34 insertions(+), 19 deletions(-) diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index f7efbf7e976d4..b029038a3b521 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -43,7 +43,7 @@ #define GPUhd() // Host and device function, inlined during GPU compilation to avoid symbol clashes in host code #define GPUhdi() inline // Host and device function, to-be-inlined on host and device #define GPUhdni() // Host and device function, not to-be-inlined automatically - #define GPUg() INVALID_TRIGGER_ERROR_NO_HOST_CODE // GPU kernel + #define GPUg() INVALID_TRIGGER_ERROR_NO_GPU_CODE // GPU kernel #define GPUshared() // shared memory variable declaration #define GPUglobal() // global memory variable declaration (only used for kernel input pointers) #define GPUconstant() // constant memory variable declaraion diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 445c03113cd39..5dbbf63ca8264 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -258,6 +258,9 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h index b0bb8a6c12ecc..81817abf1e6d6 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h @@ -72,15 +72,19 @@ class GPUTPCCompressionGatherKernels : public GPUKernelTemplate using Vec64 = uint64_t; using Vec128 = uint4; - struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock)); + struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { union { - uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)]; - Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; - Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; - Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; + uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)]; + Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; + Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; + Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; struct { - uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; - uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; + uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; + uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; } unbuffered; }; diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 406fb5e8048d6..83ef7d8cf810b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -492,14 +492,12 @@ #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression - #ifdef GPUCA_HAS_ONNX #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #endif #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index b61312b7b04be..beeefa4eb5f9d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -45,13 +45,6 @@ namespace o2::gpu // #define GPUCA_KERNEL_DEBUGGER_OUTPUT -// Some assertions to make sure the parameters are not invalid -#if defined(GPUCA_GPUCODE) - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); -#endif - // Derived parameters #ifdef GPUCA_USE_TEXTURES #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 41b5eb8a4ffb8..1bf5000cfbe5c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -41,6 +41,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number #if GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 + static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP); float mA1[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index 5009c672b030e..f487931bdaf4b 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -37,6 +37,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons #if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE); GPUTPCHitId mHits[GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; #endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index f5d8f533df651..71236bc317443 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -34,7 +34,9 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate noiseSuppression = 0, updatePeaks = 1, }; - static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression); + static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks)); + struct GPUSharedMemory { ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_NOISE_N]; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index 25d3588be6d17..a72907fe55e89 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -37,6 +37,13 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { }; +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits)); +#endif typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index e6c1dc508d6e4..a1d641fdb0b93 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -37,7 +37,13 @@ class MCLabelAccumulator; class GPUTPCNNClusterizerKernels : public GPUKernelTemplate { public: - static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels); + // Must all have same number of threads, since they use a common SCRATCH_PAD_WORK_GROUP_SIZE below + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer); struct GPUSharedMemory { // Regular cluster finder ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; From da00550e828dad4617bc4730797d154e4bf79858 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 23:10:44 +0200 Subject: [PATCH 0240/1764] GPU: Temporarily move some defines back to the wrapper, to be cleaned up in another PR --- .../Definitions/GPUDefParametersDefaults.h | 40 --------------- .../Definitions/GPUDefParametersWrapper.h | 50 ++++++++++++++++++- 2 files changed, 49 insertions(+), 41 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 83ef7d8cf810b..ce703e2ceba4a 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -75,16 +75,6 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT_DEFAULT 256 @@ -138,16 +128,6 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 512 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -201,16 +181,6 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 384 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -256,16 +226,6 @@ #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index beeefa4eb5f9d..8d8815d8a8044 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,7 +22,55 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#if defined(GPUCA_GPUCODE) +#ifdef GPUCA_GPUCODE +#if defined(GPUCA_GPUTYPE_MI2xx) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 +#elif defined(GPUCA_GPUTYPE_VEGA) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 +#elif defined(GPUCA_GPUTYPE_AMPERE) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 +#elif defined(GPUCA_GPUTYPE_TURING) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half +#endif +#endif + +#ifdef GPUCA_GPUCODE #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" From a946be89a02079eb7d8169b23137303cbbab8aa7 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 14 Apr 2025 11:17:23 +0200 Subject: [PATCH 0241/1764] GPU: remove automatic workaround for MI100 memory errors --- prodtests/full-system-test/dpl-workflow.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 2dfc74e3ecfb3..bb2712bedd92e 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -265,7 +265,6 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From 466ba06a25b9cebf15601a5db4a33b835fee170f Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 15 Apr 2025 13:21:21 +0200 Subject: [PATCH 0242/1764] DPL Analysis: introduce binned expression (#14174) --- .../include/Framework/ExpressionHelpers.h | 12 -- .../Core/include/Framework/Expressions.h | 131 +++++++++++++++++- Framework/Core/src/Expressions.cxx | 34 ++--- Framework/Core/test/test_Expressions.cxx | 27 +++- 4 files changed, 162 insertions(+), 42 deletions(-) diff --git a/Framework/Core/include/Framework/ExpressionHelpers.h b/Framework/Core/include/Framework/ExpressionHelpers.h index b531a39519272..f881abf7b0e6c 100644 --- a/Framework/Core/include/Framework/ExpressionHelpers.h +++ b/Framework/Core/include/Framework/ExpressionHelpers.h @@ -75,18 +75,6 @@ struct ColumnOperationSpec { result.type = type; } }; - -/// helper struct used to parse trees -struct NodeRecord { - /// pointer to the actual tree node - Node* node_ptr = nullptr; - size_t index = 0; - explicit NodeRecord(Node* node_, size_t index_) : node_ptr(node_), index{index_} {} - bool operator!=(NodeRecord const& rhs) - { - return this->node_ptr != rhs.node_ptr; - } -}; } // namespace o2::framework::expressions #endif // O2_FRAMEWORK_EXPRESSIONS_HELPERS_H_ diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 1d2883418de71..af89e56f85835 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -41,6 +41,7 @@ class Projector; #include #include #include +#include namespace gandiva { using Selection = std::shared_ptr; @@ -114,6 +115,8 @@ struct LiteralNode { { } + LiteralNode(LiteralNode const& other) = default; + using var_t = LiteralValue::stored_type; var_t value; atype::type type = atype::NA; @@ -132,6 +135,7 @@ struct BindingNode { /// An expression tree node corresponding to binary or unary operation struct OpNode { OpNode(BasicOp op_) : op{op_} {} + OpNode(OpNode const& other) = default; BasicOp op; }; @@ -147,6 +151,8 @@ struct PlaceholderNode : LiteralNode { } } + PlaceholderNode(PlaceholderNode const& other) = default; + void reset(InitContext& context) { value = retrieve(context, name.data()); @@ -156,6 +162,28 @@ struct PlaceholderNode : LiteralNode { LiteralNode::var_t (*retrieve)(InitContext&, char const*); }; +/// A placeholder node for parameters taken from an array +struct ParameterNode : LiteralNode { + ParameterNode(int index_ = -1) + : LiteralNode((float)0), + index{index_} + { + } + + ParameterNode(ParameterNode const&) = default; + + template + void reset(T value_, int index_ = -1) + { + (*static_cast(this)) = LiteralNode(value_); + if (index_ > 0) { + index = index_; + } + } + + int index; +}; + /// A conditional node struct ConditionalNode { }; @@ -178,6 +206,10 @@ struct Node { { } + Node(ParameterNode&& p) : self{std::forward(p)}, left{nullptr}, right{nullptr}, condition{nullptr} + { + } + Node(ConditionalNode op, Node&& then_, Node&& else_, Node&& condition_) : self{op}, left{std::make_unique(std::forward(then_))}, @@ -196,16 +228,70 @@ struct Node { right{nullptr}, condition{nullptr} {} + Node(Node const& other) + : self{other.self}, + index{other.index} + { + if (other.left != nullptr) { + left = std::make_unique(*other.left); + } + if (other.right != nullptr) { + right = std::make_unique(*other.right); + } + if (other.condition != nullptr) { + condition = std::make_unique(*other.condition); + } + } + /// variant with possible nodes - using self_t = std::variant; + using self_t = std::variant; self_t self; size_t index = 0; /// pointers to children - std::unique_ptr left; - std::unique_ptr right; - std::unique_ptr condition; + std::unique_ptr left = nullptr; + std::unique_ptr right = nullptr; + std::unique_ptr condition = nullptr; +}; + +/// helper struct used to parse trees +struct NodeRecord { + /// pointer to the actual tree node + Node* node_ptr = nullptr; + size_t index = 0; + explicit NodeRecord(Node* node_, size_t index_) : node_ptr(node_), index{index_} {} + bool operator!=(NodeRecord const& rhs) + { + return this->node_ptr != rhs.node_ptr; + } }; +/// Tree-walker helper +template +void walk(Node* head, L const& pred) +{ + std::stack path; + path.emplace(head, 0); + while (!path.empty()) { + auto& top = path.top(); + pred(top.node_ptr); + + auto* leftp = top.node_ptr->left.get(); + auto* rightp = top.node_ptr->right.get(); + auto* condp = top.node_ptr->condition.get(); + path.pop(); + + if (leftp != nullptr) { + path.emplace(leftp, 0); + } + if (rightp != nullptr) { + path.emplace(rightp, 0); + } + if (condp != nullptr) { + path.emplace(condp, 0); + } + } +} + /// overloaded operators to build the tree from an expression #define BINARY_OP_NODES(_operator_, _operation_) \ @@ -402,6 +488,43 @@ inline Node ifnode(Node&& condition_, Configurable const& then_, Configurabl return Node{ConditionalNode{}, PlaceholderNode{then_}, PlaceholderNode{else_}, std::forward(condition_)}; } +/// Parameters +inline Node par(int index) +{ + return Node{ParameterNode{index}}; +} + +/// binned functional +template +inline Node binned(std::vector const& binning, std::vector const& parameters, Node&& binned, Node&& pexp, Node&& out) +{ + int bins = binning.size() - 1; + const auto binned_copy = binned; + const auto out_copy = out; + auto root = ifnode(Node{binned_copy} < binning[0], Node{out_copy}, LiteralNode{-1}); + auto* current = &root; + for (auto i = 0; i < bins; ++i) { + current->right = std::make_unique(ifnode(Node{binned_copy} < binning[i + 1], updateParameters(pexp, bins, parameters, i), LiteralNode{-1})); + current = current->right.get(); + } + current->right = std::make_unique(out); + return root; +} + +template +Node updateParameters(Node const& pexp, int bins, std::vector const& parameters, int bin) +{ + Node result{pexp}; + auto updateParameter = [&bins, ¶meters, &bin](Node* node) { + if (node->self.index() == 5) { + auto* n = std::get_if<5>(&node->self); + n->reset(parameters[n->index * bins + bin]); + } + }; + walk(&result, updateParameter); + return result; +} + /// A struct, containing the root of the expression tree struct Filter { Filter() = default; diff --git a/Framework/Core/src/Expressions.cxx b/Framework/Core/src/Expressions.cxx index 45bb120b6eb74..6f646515b7837 100644 --- a/Framework/Core/src/Expressions.cxx +++ b/Framework/Core/src/Expressions.cxx @@ -118,6 +118,13 @@ struct PlaceholderNodeHelper { return DatumSpec{node.value, node.type}; } }; + +struct ParameterNodeHelper { + DatumSpec operator()(ParameterNode const& node) const + { + return DatumSpec{node.value, node.type}; + } +}; } // namespace std::shared_ptr concreteArrowType(atype::type type) @@ -189,37 +196,13 @@ std::ostream& operator<<(std::ostream& os, DatumSpec const& spec) void updatePlaceholders(Filter& filter, InitContext& context) { - std::stack path; - - // insert the top node into stack - path.emplace(filter.node.get(), 0); - auto updateNode = [&](Node* node) { if (node->self.index() == 3) { std::get_if<3>(&node->self)->reset(context); } }; - // while the stack is not empty - while (!path.empty()) { - auto& top = path.top(); - updateNode(top.node_ptr); - - auto* leftp = top.node_ptr->left.get(); - auto* rightp = top.node_ptr->right.get(); - auto* condp = top.node_ptr->condition.get(); - path.pop(); - - if (leftp != nullptr) { - path.emplace(leftp, 0); - } - if (rightp != nullptr) { - path.emplace(rightp, 0); - } - if (condp != nullptr) { - path.emplace(condp, 0); - } - } + expressions::walk(filter.node.get(), updateNode); } const char* stringType(atype::type t) @@ -267,6 +250,7 @@ Operations createOperations(Filter const& expression) [lh = LiteralNodeHelper{}](LiteralNode const& node) { return lh(node); }, [bh = BindingNodeHelper{}](BindingNode const& node) { return bh(node); }, [ph = PlaceholderNodeHelper{}](PlaceholderNode const& node) { return ph(node); }, + [pr = ParameterNodeHelper{}](ParameterNode const& node) { return pr(node); }, [](auto&&) { return DatumSpec{}; }}, node->self); }; diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 8b08a9a38aa63..2296b5dcbfbc4 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -12,7 +12,6 @@ #include "Framework/Configurable.h" #include "Framework/ExpressionHelpers.h" #include "Framework/AnalysisDataModel.h" -#include "Framework/AODReaderHelpers.h" #include #include @@ -283,3 +282,29 @@ TEST_CASE("TestConditionalExpressions") auto gandiva_filter2 = createFilter(schema2, gandiva_condition2); REQUIRE(gandiva_tree2->ToString() == "bool greater_than((float) fSigned1Pt, (const float) 0 raw(0)) && if (bool less_than(float absf((float) fEta), (const float) 1 raw(3f800000)) && if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { bool greater_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) } else { bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) }) { bool greater_than(float absf((float) fX), (const float) 1 raw(3f800000)) } else { bool greater_than(float absf((float) fY), (const float) 1 raw(3f800000)) }"); } + +TEST_CASE("TestBinnedExpressions") +{ + std::vector bins{0.5, 1.5, 2.5, 3.5, 4.5}; + std::vector params{1.0, 1.1, 1.2, 1.3, 2.0, 2.1, 2.2, 2.3, 3.0, 3.1, 3.2, 3.3, 4.0, 4.1, 4.2, 4.3}; + Projector p = binned(bins, params, o2::aod::track::pt, par(0) * o2::aod::track::x + par(1) * o2::aod::track::y + par(2) * o2::aod::track::z + par(3) * o2::aod::track::phi, LiteralNode{0.f}); + auto pspecs = createOperations(p); + auto schema = std::make_shared(std::vector{o2::aod::track::Pt::asArrowField(), o2::aod::track::X::asArrowField(), o2::aod::track::Y::asArrowField(), o2::aod::track::Z::asArrowField(), o2::aod::track::Phi::asArrowField()}); + auto tree = createExpressionTree(pspecs, schema); + REQUIRE(tree->ToString() == "if (bool less_than((float) fPt, (const float) 0.5 raw(3f000000))) { (const float) 0 raw(0) } else { if (bool less_than((float) fPt, (const float) 1.5 raw(3fc00000))) { float add(float add(float add(float multiply((const float) 1 raw(3f800000), (float) fX), float multiply((const float) 2 raw(40000000), (float) fY)), float multiply((const float) 3 raw(40400000), (float) fZ)), float multiply((const float) 4 raw(40800000), (float) fPhi)) } else { if (bool less_than((float) fPt, (const float) 2.5 raw(40200000))) { float add(float add(float add(float multiply((const float) 1.1 raw(3f8ccccd), (float) fX), float multiply((const float) 2.1 raw(40066666), (float) fY)), float multiply((const float) 3.1 raw(40466666), (float) fZ)), float multiply((const float) 4.1 raw(40833333), (float) fPhi)) } else { if (bool less_than((float) fPt, (const float) 3.5 raw(40600000))) { float add(float add(float add(float multiply((const float) 1.2 raw(3f99999a), (float) fX), float multiply((const float) 2.2 raw(400ccccd), (float) fY)), float multiply((const float) 3.2 raw(404ccccd), (float) fZ)), float multiply((const float) 4.2 raw(40866666), (float) fPhi)) } else { if (bool less_than((float) fPt, (const float) 4.5 raw(40900000))) { float add(float add(float add(float multiply((const float) 1.3 raw(3fa66666), (float) fX), float multiply((const float) 2.3 raw(40133333), (float) fY)), float multiply((const float) 3.3 raw(40533333), (float) fZ)), float multiply((const float) 4.3 raw(4089999a), (float) fPhi)) } else { (const float) 0 raw(0) } } } } }"); + + std::vector binning{0, o2::constants::math::PIHalf, o2::constants::math::PI, o2::constants::math::PI + o2::constants::math::PIHalf, o2::constants::math::TwoPI}; + std::vector parameters{1.0, 1.1, 1.2, 1.3, // par 0 + 2.0, 2.1, 2.2, 2.3, // par 1 + 3.0, 3.1, 3.2, 3.3, // par 2 + 4.0, 4.1, 4.2, 4.3}; // par 3 + + Projector p2 = binned((std::vector)binning, + (std::vector)parameters, + o2::aod::track::phi, par(0) * o2::aod::track::x * o2::aod::track::x + par(1) * o2::aod::track::y * o2::aod::track::y + par(2) * o2::aod::track::z * o2::aod::track::z, + LiteralNode{-1.f}); + auto p2specs = createOperations(p2); + auto schema2 = std::make_shared(std::vector{o2::aod::track::Phi::asArrowField(), o2::aod::track::X::asArrowField(), o2::aod::track::Y::asArrowField(), o2::aod::track::Z::asArrowField()}); + auto tree2 = createExpressionTree(p2specs, schema2); + REQUIRE(tree2->ToString() == "if (bool less_than((float) fPhi, (const float) 0 raw(0))) { (const float) -1 raw(bf800000) } else { if (bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb))) { float add(float add(float multiply(float multiply((const float) 1 raw(3f800000), (float) fX), (float) fX), float multiply(float multiply((const float) 2 raw(40000000), (float) fY), (float) fY)), float multiply(float multiply((const float) 3 raw(40400000), (float) fZ), (float) fZ)) } else { if (bool less_than((float) fPhi, (const float) 3.14159 raw(40490fdb))) { float add(float add(float multiply(float multiply((const float) 1.1 raw(3f8ccccd), (float) fX), (float) fX), float multiply(float multiply((const float) 2.1 raw(40066666), (float) fY), (float) fY)), float multiply(float multiply((const float) 3.1 raw(40466666), (float) fZ), (float) fZ)) } else { if (bool less_than((float) fPhi, (const float) 4.71239 raw(4096cbe4))) { float add(float add(float multiply(float multiply((const float) 1.2 raw(3f99999a), (float) fX), (float) fX), float multiply(float multiply((const float) 2.2 raw(400ccccd), (float) fY), (float) fY)), float multiply(float multiply((const float) 3.2 raw(404ccccd), (float) fZ), (float) fZ)) } else { if (bool less_than((float) fPhi, (const float) 6.28319 raw(40c90fdb))) { float add(float add(float multiply(float multiply((const float) 1.3 raw(3fa66666), (float) fX), (float) fX), float multiply(float multiply((const float) 2.3 raw(40133333), (float) fY), (float) fY)), float multiply(float multiply((const float) 3.3 raw(40533333), (float) fZ), (float) fZ)) } else { (const float) -1 raw(bf800000) } } } } }"); +} From 64ef6b929885f2f422d22eb6d2d824a50b40e463 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 15 Apr 2025 14:10:51 +0200 Subject: [PATCH 0243/1764] Revert "GPU: Temporarily move some defines back to the wrapper, to be cleaned up in another PR" This reverts commit da00550e828dad4617bc4730797d154e4bf79858. --- .../Definitions/GPUDefParametersDefaults.h | 40 +++++++++++++++ .../Definitions/GPUDefParametersWrapper.h | 50 +------------------ 2 files changed, 41 insertions(+), 49 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index ce703e2ceba4a..83ef7d8cf810b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -75,6 +75,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT_DEFAULT 256 @@ -128,6 +138,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 512 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -181,6 +201,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 384 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -226,6 +256,16 @@ #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 8d8815d8a8044..beeefa4eb5f9d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,55 +22,7 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#ifdef GPUCA_GPUCODE -#if defined(GPUCA_GPUTYPE_MI2xx) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half -#endif -#endif - -#ifdef GPUCA_GPUCODE +#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" From efbf1474d8f2dcf42572bd7abdd7439580e8f7ba Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 15 Apr 2025 14:10:39 +0200 Subject: [PATCH 0244/1764] GPU: Move compile-time constant parameters to new scheme with runtimeParameter struct and automatic RTC-generated defines With this, we can revert the workaround to have the parameters as defines in the wrapper file --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 - GPU/GPUTracking/Base/GPUReconstruction.h | 3 +- .../Base/GPUReconstructionIncludes.h | 19 +- .../Base/GPUReconstructionProcessing.h | 1 + .../Base/cuda/GPUReconstructionCUDA.cu | 5 - .../Base/cuda/GPUReconstructionCUDA.h | 1 - .../Base/opencl/GPUReconstructionOCL.cxx | 5 - .../Base/opencl/GPUReconstructionOCL.h | 1 - GPU/GPUTracking/CMakeLists.txt | 9 +- .../DataCompression/GPUTPCCompression.cxx | 16 +- .../Definitions/GPUDefConstantsAndSettings.h | 2 +- .../Definitions/GPUDefParametersConstants.h | 45 +- .../Definitions/GPUDefParametersDefaults.h | 567 ++++++++++-------- .../GPUDefParametersLoad.template.inc | 35 +- .../GPUDefParametersRuntime.template.h | 4 + .../Definitions/GPUDefParametersWrapper.h | 6 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 8 +- .../Global/GPUChainTrackingCompression.cxx | 25 +- .../Global/GPUChainTrackingMerger.cxx | 10 +- .../Global/GPUChainTrackingSectorTracker.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 10 +- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.h | 10 +- .../SectorTracker/GPUTPCStartHitsFinder.cxx | 4 +- .../SectorTracker/GPUTPCTracker.cxx | 6 +- .../SectorTracker/GPUTPCTrackletSelector.cxx | 18 +- .../SectorTracker/GPUTPCTrackletSelector.h | 8 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 19 + GPU/GPUTracking/dEdx/GPUdEdx.cxx | 4 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 14 +- GPU/GPUTracking/kernels.cmake | 13 + 34 files changed, 466 insertions(+), 419 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 2f643706647ee..b4dac39ae1cd2 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -292,8 +292,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.delayedOutput = false; } - UpdateAutomaticProcessingSettings(); - GPUCA_GPUReconstructionUpdateDefaults(); if (!mProcessingSettings.rtc.enable) { mProcessingSettings.rtc.optConstexpr = false; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 18098396e1349..23fb6e4d9ff06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -48,6 +48,7 @@ struct GPUReconstructionPipelineContext; struct GPUReconstructionThreading; class GPUROOTDumpCore; class ThrustVolatileAllocator; +struct GPUDefParameters; namespace gpu_reconstruction_kernels { @@ -205,6 +206,7 @@ class GPUReconstruction GPUOutputControl& OutputControl() { return mOutputControl; } uint32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } + virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0; RecoStepField GetRecoSteps() const { return mRecoSteps.steps; } RecoStepField GetRecoStepsGPU() const { return mRecoSteps.stepsGPUMask; } @@ -239,7 +241,6 @@ class GPUReconstruction void FreeRegisteredMemory(GPUMemoryResource* res); GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor int32_t InitPhaseBeforeDevice(); - virtual void UpdateAutomaticProcessingSettings() {} virtual int32_t InitDevice() = 0; int32_t InitPhasePermanentMemory(); int32_t InitPhaseAfterDevice(); diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index 6aba7e30a49d7..d3f11d86a731d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -29,21 +29,4 @@ #include #include -#define GPUCA_GPUReconstructionUpdateDefaults() \ - if (mProcessingSettings.alternateBorderSort < 0) { \ - mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ - } \ - if (mProcessingSettings.mergerSortTracks < 0) { \ - mProcessingSettings.mergerSortTracks = GPUCA_SORT_BEFORE_FIT; \ - } \ - if (param().rec.tpc.looperInterpolationInExtraPass < 0) { \ - param().rec.tpc.looperInterpolationInExtraPass = GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION; \ - } \ - if (mProcessingSettings.tpcCompressionGatherModeKernel < 0) { \ - mProcessingSettings.tpcCompressionGatherModeKernel = GPUCA_COMP_GATHER_KERNEL; \ - } \ - if (mProcessingSettings.tpcCompressionGatherMode < 0) { \ - mProcessingSettings.tpcCompressionGatherMode = GPUCA_COMP_GATHER_MODE; \ - } - -#endif +#endif // GPURECONSTRUCTIONINCLUDES_H diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 2428027118c0a..e8892c4be702b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -101,6 +101,7 @@ class GPUReconstructionProcessing : public GPUReconstruction uint32_t countToGPU = 0; uint32_t countToHost = 0; }; + const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); } protected: GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 47a9b675d27f6..3bea91994ba86 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -99,11 +99,6 @@ void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr } } -void GPUReconstructionCUDA::UpdateAutomaticProcessingSettings() -{ - GPUCA_GPUReconstructionUpdateDefaults(); -} - int32_t GPUReconstructionCUDA::InitDevice_Runtime() { #ifndef __HIPCC__ // CUDA diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 3441c6b9a4fd6..b1a3a53a6a62f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -59,7 +59,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels GetThreadContext() override; void SynchronizeGPU() override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index af26bfc7aeca8..e276f83413bbc 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -60,11 +60,6 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons return error != CL_SUCCESS; } -void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() -{ - GPUCA_GPUReconstructionUpdateDefaults(); -} - int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() { if (mMaster == nullptr) { diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 16ef9b5e87fe8..abde42f01f073 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -37,7 +37,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; - void UpdateAutomaticProcessingSettings() override; virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index f428d982394e0..eaeec508ff27a 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -247,14 +247,17 @@ foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) endforeach() +set(GPUDEFPARAMETERSLBLIST "$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n") +string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>\n") +string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>") file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase - CONTENT "$,REPLACE,[^A-Za-z0-9]+,_>,\n>" -) + CONTENT ${GPUDEFPARAMETERSLBLIST}) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h - COMMAND awk "{print(\"#ifndef GPUCA_LB_\" $0 \"\\n#define GPUCA_LB_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h COMMENT "Generating GPUDefParametersLoadPrepare.h" + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase VERBATIM COMMAND_EXPAND_LISTS ) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 335b201d11d07..8a22545314252 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -16,6 +16,7 @@ #include "GPUReconstruction.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" +#include "GPUDefParametersRuntime.h" using namespace o2::gpu; @@ -36,11 +37,12 @@ void* GPUTPCCompression::SetPointersOutputHost(void* mem) void* GPUTPCCompression::SetPointersScratch(void* mem) { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; computePointerWithAlignment(mem, mClusterStatus, mMaxClusters); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode >= 2) { + if (gatherMode >= 2) { computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTracks); } - if (mRec->GetProcessingSettings().tpcCompressionGatherMode != 1) { + if (gatherMode != 1) { SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false); } return mem; @@ -48,8 +50,9 @@ void* GPUTPCCompression::SetPointersScratch(void* mem) void* GPUTPCCompression::SetPointersOutput(void* mem) { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTrackClusters); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false); } return mem; @@ -102,12 +105,13 @@ void* GPUTPCCompression::SetPointersMemory(void* mem) void GPUTPCCompression::RegisterMemoryAllocation() { AllocateAndInitializeLate(); + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; mMemoryResOutputHost = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputHost, GPUMemoryResource::MEMORY_OUTPUT_FLAG | GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_CUSTOM, "TPCCompressionOutputHost"); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { mMemoryResOutputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCCompressionOutputGPU"); } - uint32_t stackScratch = (mRec->GetProcessingSettings().tpcCompressionGatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; - if (mRec->GetProcessingSettings().tpcCompressionGatherMode < 2) { + uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; + if (gatherMode < 2) { mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | stackScratch, "TPCCompressionOutput"); } mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | stackScratch, "TPCCompressionScratch"); diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 2d7aca8d71b92..48218dd7859e6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -13,7 +13,7 @@ /// \author David Rohr // This files contains compile-time constants affecting the GPU algorithms / reconstruction results. -// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters.h +// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters*.h #ifndef GPUDEFCONSTANTSANDSETTINGS_H #define GPUDEFCONSTANTSANDSETTINGS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h index 3a16d02ecf7c6..dd4a5dcbe7ba8 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -21,16 +21,12 @@ #define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! #if defined(__CUDACC__) || defined(__HIPCC__) - #define GPUCA_SPECIALIZE_THRUST_SORTS + #define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant #endif #define GPUCA_MAX_THREADS 1024 #define GPUCA_MAX_STREAMS 36 -#if defined(GPUCA_GPUCODE) - #define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU -#endif - #define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid #define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers #define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks @@ -44,44 +40,5 @@ #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#ifdef GPUCA_GPUCODE - #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 - #endif - #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 - #endif - #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif - #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 - #endif -#else - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #define GPUCA_SORT_BEFORE_FIT 0 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_THREAD_COUNT_FINDER 1 - #define GPUCA_COMP_GATHER_KERNEL 0 - #define GPUCA_COMP_GATHER_MODE 0 -#endif -#ifndef GPUCA_DEDX_STORAGE_TYPE - #define GPUCA_DEDX_STORAGE_TYPE float -#endif -#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#endif - // clang-format on #endif // GPUDEFPARAMETERSCONSTANTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 83ef7d8cf810b..b212abbcd2707 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -14,266 +14,265 @@ // This file contains compile-time constants affecting the GPU performance. -#if !defined(GPUDEFPARAMETERSDEFAULTS_H) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. +#if !defined(GPUDEFPARAMETERSDEFAULTS_H) #define GPUDEFPARAMETERSDEFAULTS_H // clang-format off // Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) // GPU Run Configuration -#ifdef GPUCA_GPUCODE -#if defined(GPUCA_GPUTYPE_MI2xx) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 128 - #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 512 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 384 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 - #define GPUCA_LB_GPUTPCCFPeakFinder 128 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 - #define GPUCA_LB_GPUTPCCFDeconvolution 384 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - // #define GPUCA_USE_TEXTURES -#elif defined(GPUCA_GPUTYPE_OPENCL) -#else - #error GPU TYPE NOT SET -#endif -#endif // GPUCA_GPUCODE +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. + // GPU-architecture-dependent default settings + #if defined(GPUCA_GPUTYPE_MI2xx) + #define GPUCA_WARP_SIZE 64 + #define GPUCA_THREAD_COUNT_DEFAULT 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024 + #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 + #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 896 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512 + #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_VEGA) + #define GPUCA_WARP_SIZE 64 + #define GPUCA_THREAD_COUNT_DEFAULT 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 128 + #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 + #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 896 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512 + #define GPUCA_LB_GPUTPCCFClusterizer 512 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_AMPERE) + #define GPUCA_WARP_SIZE 32 + #define GPUCA_THREAD_COUNT_DEFAULT 512 + #define GPUCA_LB_GPUTPCCreateTrackingData 384 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 512 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 + #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 512 + #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 + #define GPUCA_LB_GPUTPCCFPeakFinder 128 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 + #define GPUCA_LB_GPUTPCCFDeconvolution 384 + #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_TURING) + #define GPUCA_WARP_SIZE 32 + #define GPUCA_THREAD_COUNT_DEFAULT 512 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 512 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 + #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 512 + #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + // #define GPUCA_USE_TEXTURES + #elif defined(GPUCA_GPUTYPE_OPENCL) + #else + #error GPU TYPE NOT SET + #endif -#ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type #ifndef GPUCA_WARP_SIZE #define GPUCA_WARP_SIZE 32 @@ -509,7 +508,67 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER -#endif + + // Defaults for non-LB parameters + #ifndef GPUCA_PAR_SORT_STARTHITS + #define GPUCA_PAR_SORT_STARTHITS 1 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #endif + #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #endif + #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_PAR_SORT_BEFORE_FIT + #define GPUCA_PAR_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_KERNEL + #define GPUCA_PAR_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_MODE + #define GPUCA_PAR_COMP_GATHER_MODE 2 + #endif +#endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + +#ifndef GPUCA_GPUCODE_GENRTC + // Defaults (also for CPU) for non-LB parameters + #ifndef GPUCA_PAR_SORT_STARTHITS + #define GPUCA_PAR_SORT_STARTHITS 0 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 + #endif + #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 0 + #endif + #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_PAR_SORT_BEFORE_FIT + #define GPUCA_PAR_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_KERNEL + #define GPUCA_PAR_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_MODE + #define GPUCA_PAR_COMP_GATHER_MODE 0 + #endif + #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE + #define GPUCA_PAR_DEDX_STORAGE_TYPE float + #endif + #ifndef GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE float + #endif +#endif // GPUCA_GPUCODE_GENRTC // clang-format on #endif // GPUDEFPARAMETERSDEFAULTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index 938cedbdacc93..ac71adc6232a6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -32,29 +32,38 @@ static GPUDefParameters GPUDefParametersLoad() // clang-format off {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_LB_EMPTY0(GPUCA_M_FIRST(GPUCA_LB_>,APPEND,))>,$>}, {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0))>,$>}, - {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>} + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>}, + $,PREPEND,GPUCA_PAR_>,$>, + $,PREPEND,GPUCA_M_STR(GPUCA_PAR_>,APPEND,)>,$> // clang-format on }; } -#define GPUCA_EXPORT_KERNEL(name) \ - if (par.par_LB_maxThreads[i] > 0) { \ - o << "#define GPUCA_LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ - if (par.par_LB_minBlocks[i] > 0) { \ - o << ", " << par.par_LB_minBlocks[i]; \ - } \ - if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ - o << ", " << par.par_LB_forceBlocks[i]; \ - } \ - o << "\n"; \ - } \ +#define GPUCA_EXPORT_KERNEL_LB(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ i++; +#define GPUCA_EXPORT_KERNEL_PARAM(name) \ + o << "#define GPUCA_PAR_" GPUCA_M_STR(name) " " << GPUCA_M_CAT(par.par_, name) << "\n"; + static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC) { std::stringstream o; // clang-format off int32_t i = 0; - $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL(>,APPEND,)>, + $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL_LB(>,APPEND,)>, + > + $,PREPEND,GPUCA_EXPORT_KERNEL_PARAM(>,APPEND,)>, + > + $,PREPEND,GPUCA_EXPORT_KERNEL_PARAM(>,APPEND,)>, > return o.str(); // clang-format on } diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h index f3537c058a824..d023de7916676 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h @@ -21,6 +21,10 @@ struct GPUDefParameters { // clang-format off int32_t par_LB_maxThreads[$>] = {}; int32_t par_LB_minBlocks[$>] = {}; int32_t par_LB_forceBlocks[$>] = {}; + $,PREPEND,int32_t par_>,APPEND, = 0>,$ + >; + $,PREPEND,char par_>,APPEND,[128] = "">,$ + >; }; // clang-format on } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index beeefa4eb5f9d..b2c08d689aeb2 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,9 +22,7 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" -#endif #include "GPUDefParametersConstants.h" namespace o2::gpu @@ -38,8 +36,8 @@ namespace o2::gpu #define GPUCA_GET_WARP_COUNT(...) 1 // since launch bound constants are not defined in host-code, and must evaluate to 1! #endif -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) -#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) +#define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_PAR_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_PAR_DEDX_STORAGE_TYPE) // #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 6753db280d5bf..43fa49ff74817 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -40,6 +40,7 @@ #include "GPUTrackingInputProvider.h" #include "GPUNewCalibValues.h" #include "GPUTriggerOutputs.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCClusterStatistics.h" #include "GPUHostDataTypes.h" @@ -254,6 +255,7 @@ bool GPUChainTracking::ValidateSteps() bool GPUChainTracking::ValidateSettings() { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; if ((param().rec.tpc.nWays & 1) == 0) { GPUError("nWay setting musst be odd number!"); return false; @@ -270,7 +272,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } - if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) { + if (GetProcessingSettings().noGPUMemoryRegistration && gatherMode != 3) { GPUError("noGPUMemoryRegistration only possible with gather mode 3"); return false; } @@ -286,7 +288,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("Must use external output for double pipeline mode"); return false; } - if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { GPUError("Double pipeline incompatible to compression mode 1"); return false; } @@ -295,7 +297,7 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { + if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (gatherMode == 1 || gatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 03d319f42fd6b..8fb6fc4771658 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -18,6 +18,7 @@ #include "GPUTrackingInputProvider.h" #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" +#include "GPUDefParametersRuntime.h" #include "utils/strtag.h" #include @@ -30,6 +31,7 @@ int32_t GPUChainTracking::RunTPCCompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCCOMPR")); RecoStep myStep = RecoStep::TPCCompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCCompression; + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; GPUTPCCompression& Compressor = processors()->tpcCompressor; GPUTPCCompression& CompressorShadow = doGPU ? processorsShadow()->tpcCompressor : Compressor; const auto& threadContext = GetThreadContext(); @@ -37,7 +39,7 @@ int32_t GPUChainTracking::RunTPCCompression() RecordMarker(&mEvents->single, 0); } - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile } SetupGPUProcessor(&Compressor, true); @@ -70,7 +72,7 @@ int32_t GPUChainTracking::RunTPCCompression() Compressor.mOutputFlat->set(outputSize, *Compressor.mOutput); char* hostFlatPtr = (char*)Compressor.mOutput->qTotU; // First array as allocated in GPUTPCCompression::SetPointersCompressedClusters size_t copySize = 0; - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { CompressorShadow.mOutputA = Compressor.mOutput; copySize = AllocateRegisteredMemory(Compressor.mMemoryResOutputGPU); // We overwrite Compressor.mOutput with the allocated output pointers on the GPU } @@ -81,8 +83,8 @@ int32_t GPUChainTracking::RunTPCCompression() SynchronizeStream(OutputStream()); // Synchronize output copies running in parallel from memory that might be released, only the following async copy from stacked memory is safe after the chain finishes. outputStream = OutputStream(); } - if (GetProcessingSettings().tpcCompressionGatherMode >= 2) { - if (GetProcessingSettings().tpcCompressionGatherMode == 2) { + if (gatherMode >= 2) { + if (gatherMode == 2) { void* devicePtr = mRec->getGPUPointer(Compressor.mOutputFlat); if (devicePtr != Compressor.mOutputFlat) { CompressedClustersPtrs& ptrs = *Compressor.mOutput; // We need to update the ptrs with the gpu-mapped version of the host address space @@ -94,7 +96,8 @@ int32_t GPUChainTracking::RunTPCCompression() TransferMemoryResourcesToGPU(myStep, &Compressor, outputStream); constexpr uint32_t nBlocksDefault = 2; constexpr uint32_t nBlocksMulti = 1 + 2 * 200; - switch (GetProcessingSettings().tpcCompressionGatherModeKernel) { + int32_t gatherModeKernel = mRec->GetProcessingSettings().tpcCompressionGatherModeKernel == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_KERNEL : mRec->GetProcessingSettings().tpcCompressionGatherMode; + switch (gatherModeKernel) { case 0: runKernel(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression)); getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); @@ -117,10 +120,10 @@ int32_t GPUChainTracking::RunTPCCompression() getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); break; default: - GPUError("Invalid compression kernel %d selected.", (int32_t)GetProcessingSettings().tpcCompressionGatherModeKernel); + GPUError("Invalid compression kernel %d selected.", (int32_t)gatherModeKernel); return 1; } - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { RecordMarker(&mEvents->stream[outputStream], outputStream); char* deviceFlatPts = (char*)Compressor.mOutput->qTotU; if (GetProcessingSettings().doublePipeline) { @@ -135,9 +138,9 @@ int32_t GPUChainTracking::RunTPCCompression() } } else { int8_t direction = 0; - if (GetProcessingSettings().tpcCompressionGatherMode == 0) { + if (gatherMode == 0) { P = &CompressorShadow.mPtrs; - } else if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + } else if (gatherMode == 1) { P = &Compressor.mPtrs; direction = -1; gatherTimer = &getTimer("GPUTPCCompression_GatherOnCPU", 0); @@ -181,11 +184,11 @@ int32_t GPUChainTracking::RunTPCCompression() GPUMemCpyAlways(myStep, O->timeA, P->timeA, O->nTracks * sizeof(O->timeA[0]), outputStream, direction); GPUMemCpyAlways(myStep, O->padA, P->padA, O->nTracks * sizeof(O->padA[0]), outputStream, direction); } - if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { gatherTimer->Stop(); } mIOPtrs.tpcCompressedClusters = Compressor.mOutputFlat; - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { SynchronizeEventAndRelease(mEvents->stream[outputStream]); mRec->ReturnVolatileDeviceMemory(); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index ffab3ba0be063..a647c213660c9 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -14,6 +14,7 @@ #include "GPUChainTracking.h" #include "GPULogging.h" +#include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" #include "GPUQA.h" #include "utils/strtag.h" @@ -31,7 +32,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto runKernel({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0); } uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; - if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { + if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) { RecordMarker(&mEvents->single, 0); TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); for (uint32_t i = 0; i < n; i++) { @@ -176,7 +177,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) waitForTransfer = 1; } - if (GetProcessingSettings().mergerSortTracks) { + const bool mergerSortTracks = GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(doGPU).par_SORT_BEFORE_FIT : GetProcessingSettings().mergerSortTracks; + if (mergerSortTracks) { runKernel(GetGridAuto(0, deviceType)); CondWaitEvent(waitForTransfer, &mEvents->single); runKernel(GetGridAuto(0, deviceType)); @@ -212,11 +214,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), GetProcessingSettings().mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } - if (param().rec.tpc.looperInterpolationInExtraPass) { + if (param().rec.tpc.looperInterpolationInExtraPass == -1 ? mRec->getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION : param().rec.tpc.looperInterpolationInExtraPass) { runKernel(GetGridAuto(0)); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 3e7447892307a..64a9179baf0e6 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -19,6 +19,7 @@ #include "GPUTPCClusterData.h" #include "GPUTrackingInputProvider.h" #include "GPUTPCClusterOccupancyMap.h" +#include "GPUDefParametersRuntime.h" #include "utils/strtag.h" #include @@ -200,11 +201,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (doGPU) { + if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) { runKernel({GetGridAuto(useStream), {iSector}}); } -#endif if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSector}}); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index bdf60f744b9ca..f42e5f35b1dc9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -37,6 +37,7 @@ #include "TPCFastTransform.h" #include "GPUTPCConvertImpl.h" #include "GPUTPCGeometry.h" +#include "GPUDefParametersRuntime.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" @@ -288,7 +289,8 @@ void* GPUTPCGMMerger::SetPointersMemory(void* mem) void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) { computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks); - if (mRec->GetProcessingSettings().mergerSortTracks) { + const bool mergerSortTracks = mRec->GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging).par_SORT_BEFORE_FIT : mRec->GetProcessingSettings().mergerSortTracks; + if (mergerSortTracks) { computePointerWithAlignment(mem, mTrackOrderProcess, mNMaxTracks); } return mem; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 238b04510862e..ba251ce34a3eb 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -32,7 +32,7 @@ enum attachTypes { attachAttached = 0x40000000, struct InterpolationErrorHit { float posY, posZ; - GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; + GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; }; struct InterpolationErrors { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index f1aac3da9a7a2..1617ac7b828af 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -663,7 +663,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->err2Y = err2Y; debugVals->err2Z = err2Z; }); if (rejectChi2 >= rejectInterFill) { - if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { rejectChi2 = rejectDirect; } else { int32_t retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 260c64db052af..77453a87b3763 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -309,7 +309,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 - if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { const float Iz0 = interpolation.hit[ihit].posY - mP[0]; const float Iz1 = interpolation.hit[ihit].posZ - mP[1]; float Iw0 = mC[2] + (float)interpolation.hit[ihit].errorZ; @@ -631,7 +631,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; -#if !defined(GPUCA_NO_ATOMIC_PRECHECK) && GPUCA_NO_ATOMIC_PRECHECK < 1 +#if GPUCA_NO_ATOMIC_PRECHECK == 0 if (myWeight <= *weight) { continue; } @@ -757,7 +757,8 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr if (Merger->Param().rec.tpc.disableRefitAttachment & 4) { return 1; } - if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { + const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; + if (inExtraPass && phase2 == false) { StoreAttachMirror(Merger, sector, iRow, iTrack, toAlpha, toY, toX, toSector, toRow, inFlyDirection, prop.GetAlpha()); return 1; } @@ -862,7 +863,8 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU if (Merger->Param().rec.tpc.disableRefitAttachment & 8) { return; } - if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { + const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; + if (inExtraPass && phase2 == false) { StoreAttachMirror(Merger, sector, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); return; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index ec348b59ce7a5..d76c079bb406f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -76,7 +76,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } #define UnrollGlobal 4 -#define MaxShared GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP +#define MaxShared GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #if MaxShared < GPUCA_MAXN #define MaxGlobal ((GPUCA_MAXN - MaxShared - 1) / UnrollGlobal + 1) * UnrollGlobal #else diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 1bf5000cfbe5c..0ecd230a67415 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -40,11 +40,11 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number -#if GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP); - float mA1[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; - float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; - calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; +#if GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 + static_assert(GPUCA_MAXN >= GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP); + float mA1[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; + float mA2[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; + calink mB[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; #endif GPUTPCRow mRow, mRowUp, mRowDown; }; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index af79dddae554e..20dfd69864816 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -39,7 +39,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; if (tracker.mData.mLinkDownData[lHitNumberOffset + ih] == CALINK_INVAL && linkUpData != CALINK_INVAL && tracker.mData.mLinkUpData[rowUp.mHitNumberOffset + linkUpData] != CALINK_INVAL) { -#ifdef GPUCA_SORT_STARTHITS +#if GPUCA_PAR_SORT_STARTHITS > 0 GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxRowStartHits) { @@ -61,7 +61,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr } GPUbarrier(); -#ifdef GPUCA_SORT_STARTHITS +#if GPUCA_PAR_SORT_STARTHITS > 0 if (iThread == 0) { uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 6c1b4eda0d7f5..e923e126e1841 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -22,6 +22,7 @@ #include "GPUTPCTrackParam.h" #include "GPUParam.inc" #include "GPUTPCConvertImpl.h" +#include "GPUDefParametersRuntime.h" #if !defined(GPUCA_GPUCODE) #include @@ -143,13 +144,12 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxRowHits = mRec->MemoryScalers()->NTPCTrackletHits(mData.NumberOfHits()); mNMaxTracks = mRec->MemoryScalers()->NTPCSectorTracks(mData.NumberOfHits()); mNMaxTrackHits = mRec->MemoryScalers()->NTPCSectorTrackHits(mData.NumberOfHits(), mRec->GetProcessingSettings().tpcInputWithClusterRejection); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { + + if (mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking).par_SORT_STARTHITS) { if (mNMaxStartHits > mNMaxRowStartHits * GPUCA_ROW_COUNT) { mNMaxStartHits = mNMaxRowStartHits * GPUCA_ROW_COUNT; } } -#endif mData.SetMaxData(); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index 8810b692e1377..e27a8f66ae754 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -33,7 +33,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread } GPUbarrier(); - GPUTPCHitId trackHits[GPUCA_ROW_COUNT - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE]; + GPUTPCHitId trackHits[GPUCA_ROW_COUNT - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; const float maxSharedFrac = tracker.Param().rec.tpc.trackletMaxSharedFraction; for (int32_t itr = s.mItr0 + iThread; itr < s.mNTracklets; itr += s.mNThreadsTotal) { @@ -67,13 +67,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! gap = 0; -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (nHits < GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE) { +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + if (nHits < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { s.mHits[nHits][iThread].Set(irow, ih); } else -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 { - trackHits[nHits - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); + trackHits[nHits - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); } nHits++; if (!own) { @@ -101,13 +101,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread tracker.Tracks()[itrout].SetFirstHitID(nFirstTrackHit); tracker.Tracks()[itrout].SetNHits(nHits); for (int32_t jh = 0; jh < nHits; jh++) { -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (jh < GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE) { +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + if (jh < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; } else -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 { - tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE]; + tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; } } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index f487931bdaf4b..e5a28c80f37f9 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -36,10 +36,10 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNThreadsTotal; // total n threads int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE); - GPUTPCHitId mHits[GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + static_assert(GPUCA_ROW_COUNT >= GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE); + GPUTPCHitId mHits[GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; typedef GPUconstantref() GPUTPCTracker processorType; diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 7faab410d20ea..35f2915d9486a 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -18,6 +18,8 @@ define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES) define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES) define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) define_property(TARGET PROPERTY O2_GPU_KERNEL_NO_FAST_MATH) +define_property(TARGET PROPERTY O2_GPU_KERNEL_PARAMS) +define_property(TARGET PROPERTY O2_GPU_KERNEL_STRING_PARAMS) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") @@ -167,3 +169,20 @@ function(o2_gpu_kernel_set_deterministic) endif() endforeach() endfunction() + +function(o2_gpu_kernel_add_parameter) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + message(STATUS "Adding ${ARGV${i}}") + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_PARAMS "${ARGV${i}}") + endforeach() +endfunction() +function(o2_gpu_kernel_add_string_parameter) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + message(STATUS "Adding ${ARGV${i}}") + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_STRING_PARAMS "${ARGV${i}}") + endforeach() +endfunction() diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index fd2aeda2828e3..340463b9ec7f7 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -55,7 +55,7 @@ GPUd() void GPUdEdx::computedEdx(GPUdEdxInfo& GPUrestrict() output, const GPUPar output.NHitsSubThresholdOROC3 = countOROC3; } -GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) +GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_PAR_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) { trunclow = count * trunclow / 128; trunchigh = count * trunchigh / 128; @@ -65,7 +65,7 @@ GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() CAAlgo::sort(array, array + count); float mean = 0; for (int32_t i = trunclow; i < trunchigh; i++) { - mean += (float)array[i] * (1.f / scalingFactor::factor); + mean += (float)array[i] * (1.f / scalingFactor::factor); } return (mean / (trunchigh - trunclow)); } diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 4d3b652bdc5d1..e556fd3845d42 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -37,7 +37,7 @@ class GPUdEdx GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: - GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); + GPUd() float GetSortTruncMean(GPUCA_PAR_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); GPUd() void checkSubThresh(int32_t roc); template @@ -62,8 +62,8 @@ class GPUdEdx static constexpr int32_t MAX_NCL = GPUCA_ROW_COUNT; // Must fit in mNClsROC (uint8_t)! - GPUCA_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory - GPUCA_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory + GPUCA_PAR_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory + GPUCA_PAR_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory float mSubThreshMinTot = 0.f; float mSubThreshMinMax = 0.f; uint8_t mNClsROC[4] = {0}; @@ -78,8 +78,8 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) if (roc != mLastROC) { if (mNSubThresh && mCount + mNSubThresh <= MAX_NCL) { for (int32_t i = 0; i < mNSubThresh; i++) { - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); } mNClsROC[mLastROC] += mNSubThresh; mNClsROCSubThresh[mLastROC] += mNSubThresh; @@ -151,8 +151,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= residualGainMapGain; qtot /= residualGainMapGain; - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); mNClsROC[roc]++; if (qtot < mSubThreshMinTot) { mSubThreshMinTot = qtot; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 994f10a516b10..ee3af2b87d925 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -134,3 +134,16 @@ o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFD o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) + +o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP + TRACKLET_SELECTOR_HITS_REG_SIZE + ALTERNATE_BORDER_SORT + SORT_BEFORE_FIT + MERGER_SPLIT_LOOP_INTERPOLATION + NO_ATOMIC_PRECHECK + COMP_GATHER_KERNEL + COMP_GATHER_MODE + SORT_STARTHITS) + +o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE + MERGER_INTERPOLATION_ERROR_TYPE) From 9cb8054883602562b7f62034bb468bb3ed707017 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:15:47 +0200 Subject: [PATCH 0245/1764] DPL GUI: do not sent any state when GUI is disabled (#14184) --- Framework/Core/src/CommonServices.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index e13f1cb2094b7..5c333bbb85e3b 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -44,6 +44,7 @@ #include "Framework/DeviceConfig.h" #include "Framework/DefaultsHelpers.h" #include "Framework/Signpost.h" +#include "Framework/DriverConfig.h" #include "TextDriverClient.h" #include "WSDriverClient.h" @@ -800,6 +801,9 @@ auto sendRelayerMetrics(ServiceRegistryRef registry, DataProcessingStats& stats) auto flushStates(ServiceRegistryRef registry, DataProcessingStates& states) -> void { + if (!registry.get().driverHasGUI) { + return; + } states.flushChangedStates([&states, registry](std::string const& spec, int64_t timestamp, std::string_view value) mutable -> void { auto& client = registry.get(); client.push(spec, value, timestamp); From 39b766b44eb9a19c9f8dc4c39f880273ea1205f1 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Mon, 14 Apr 2025 15:06:19 +0200 Subject: [PATCH 0246/1764] Updated EPOS4 example using different versions and HQ --- run/SimExamples/HepMC_EPOS4/README.md | 7 ++-- run/SimExamples/HepMC_EPOS4/epos.sh | 36 +++++++++++++++---- run/SimExamples/HepMC_EPOS4/example.optns | 3 +- run/SimExamples/HepMC_EPOS4/rundpg.sh | 42 ++++++++++++++++++++-- run/SimExamples/HepMC_EPOS4/rundpl.sh | 43 +++++++++++++++++++++-- run/SimExamples/HepMC_EPOS4/runo2sim.sh | 42 ++++++++++++++++++++-- 6 files changed, 156 insertions(+), 17 deletions(-) mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/README.md mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/example.optns mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/rundpg.sh mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/runo2sim.sh diff --git a/run/SimExamples/HepMC_EPOS4/README.md b/run/SimExamples/HepMC_EPOS4/README.md old mode 100644 new mode 100755 index 94c50572cff9f..8609501a6981a --- a/run/SimExamples/HepMC_EPOS4/README.md +++ b/run/SimExamples/HepMC_EPOS4/README.md @@ -7,10 +7,12 @@ An in-depth explanation of the mechanisms behind the HepMC(3) data handling can HepMC_fifo folder of the MC examples. The scripts use the `cmd` parameter of `GeneratorHepMC` to spawn the EPOS4 generation via the `epos.sh` script. -EPOS4 uses the outdated HepMC2 libraries, so this had to be specified in the steering scripts +EPOS 4.0.0 uses the outdated HepMC2 libraries, so this had to be specified in the steering scripts of the generators configuration. If `HepMC.version=2` is removed then the scripts will not work anymore. This is to say that the balance achieved with the configurations provided is easily destroyed if the user base edits parts that are not understood completely. +The latest EPOS 4.0.3 and EPOS4HQ both use HepMC3, so the version is automatically +updated when these generators are used. # Scripts description @@ -47,6 +49,7 @@ If no parameters are provided to the scripts, they will run with default values - **-n , --nevents** → changes the number of events in the .optns file or gets the one in the file if no events are provided - **-i , --input** → .optns filename to feed EPOS4, no extension must be set in the filename - **-j , --jobs** → sets the number of workers (jobs) +- **-hq** → enables EPOS4HQ generation - **-h , --help** → prints usage instructions - **-e , --ecm** → sets the center-of-mass energy in the options file @@ -62,6 +65,6 @@ Now the three scripts start to differ: - **rundpg.sh** → first the o2dpg_sim_workflow.py script will be launched generating the json configuration, then the o2_dpg_workflow_runner.py script will start the workflow - **rundpl.sh** → o2-sim-dpl-eventgen is executed piping its results to o2-sim-mctracks-to-aod and afterwards to o2-analysis-mctracks-to-aod-simple-task -The last few lines of the scripts contain the execution of o2-sim, DPG worflow creator/runner and DPL software respectively, so this part can be modified by the users following their requirements. It's important not to delete from the configuration keys `GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;` and it would be better to provide additional configurations via the -m flag. EPOS4 cannot set a maximum impact parameter value, so it's better to leave the bMaxSwitch to none, while the others serve the sole purpose of running successfully the generator using auto generated FIFOs. +The last few lines of the scripts contain the execution of o2-sim, DPG worflow creator/runner and DPL software respectively, so this part can be modified by the users following their requirements. It's important not to delete from the configuration keys `GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;` and it would be better to provide additional configurations via the -m flag. EPOS4 cannot set a maximum impact parameter value, so it's better to leave the bMaxSwitch to none, while the others serve the sole purpose of running successfully the generator using auto generated FIFOs. diff --git a/run/SimExamples/HepMC_EPOS4/epos.sh b/run/SimExamples/HepMC_EPOS4/epos.sh index 46a7dbfa27e5c..a4185b756bf56 100755 --- a/run/SimExamples/HepMC_EPOS4/epos.sh +++ b/run/SimExamples/HepMC_EPOS4/epos.sh @@ -1,11 +1,23 @@ -#!/bin/sh +#!/bin/bash # Script based on CRMC example # EPOS4 option files must contain ihepmc set to 2 to print HepMC # data on stdout. -hepmc flag is not needed anymore, but -hepstd is fundamental # in order not to print useless information on stdout (a z-*optns*.mtr file will be created) optns="example" -seed=$RANDOM +seed=1 +EPOS4="" + +if [ -z "$EPO4VSN" ]; then + # Error: EPO4VSN environment variable is not set + exit 1 +fi + +if [ "$EPO4VSN" = "4.0.0" ]; then + EPOS4="$EPOS4_ROOT/epos4/scripts/epos" +else + EPOS4="$EPOS4_ROOT/bin/epos" +fi while test $# -gt 0 ; do case $1 in @@ -18,13 +30,23 @@ done if [ ! -f $optns.optns ]; then echo "Error: Options file $optns.optns not found" - exit 1 + exit 2 +fi + +if grep -Fq "set ihq 1" $optns.optns; then + if [ -z "$EPO4HQVSN" ]; then + # Error: EPOS4HQ version not found + exit 3 + else + # Running with EPOS4HQ + EPOS4="$EPO4HQ/bin/eposhq" + fi fi if [ $seed -eq 0 ]; then - echo "Seed can't be 0, random number will be used" - seed=$RANDOM + # Seed can't be 0, random number will be used + seed="$RANDOM" fi -# Or filters the stdout with only HepMC2 useful data -$EPOS4_ROOT/epos4/scripts/epos -hepstd -s $seed $optns | sed -n 's/^\(HepMC::\|[EAUWVP] \)/\1/p' +# OR filters the stdout with only HepMC useful data +$EPOS4 -hepstd -s $seed $optns | sed -n 's/^\(HepMC::\|[EAUWVP] \)/\1/p' diff --git a/run/SimExamples/HepMC_EPOS4/example.optns b/run/SimExamples/HepMC_EPOS4/example.optns old mode 100644 new mode 100755 index c2b067941e4e8..9df738d15fcff --- a/run/SimExamples/HepMC_EPOS4/example.optns +++ b/run/SimExamples/HepMC_EPOS4/example.optns @@ -29,4 +29,5 @@ set nfreeze 1 !number of freeze out events per hydro event set modsho 1 !printout every modsho events set centrality 0 !0=min bias set ihepmc 2 !HepMC output enabled on stdout -set nfull 10 +set nfull 10 !Total nEvents to be generated +set ihq 1 !Enable EPOS4HQ diff --git a/run/SimExamples/HepMC_EPOS4/rundpg.sh b/run/SimExamples/HepMC_EPOS4/rundpg.sh old mode 100644 new mode 100755 index 93993f66bfbd6..ea6d29ce0fa54 --- a/run/SimExamples/HepMC_EPOS4/rundpg.sh +++ b/run/SimExamples/HepMC_EPOS4/rundpg.sh @@ -23,6 +23,18 @@ optns="example" TF=1 eCM=-1 JOBS=2 +HEPMC="" +HQ=false + +if [ -z "$EPO4VSN" ]; then + echo "Error: EPOS4 version not found" + exit 7 +fi +if [ "$EPO4VSN" == "4.0.0" ]; then + HEPMC=";HepMC.version=2" +else + HEPMC=";HepMC.version=3" +fi usage() { @@ -38,6 +50,7 @@ Options: -h,--help Print these instructions -e,--ecm ENERGY Center-of-Mass energy -t,--tf TF Timeframes ($TF) + -hq HQ Enable EPOS4HQ -- Rest of command line sent to o2-sim COMMAND must be quoted if it contains spaces or other special @@ -59,6 +72,7 @@ while test $# -gt 0 ; do -i|--input) optns=$2 ; shift ;; -j|--jobs) JOBS=$2 ; shift ;; -e|--ecm) eCM=$2 ; shift ;; + -hq) HQ=true ; shift ;; -h|--help) usage; ${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py --help ; exit 0 ;; -t|--tf) TF=$2 ; shift ;; --) shift ; break ;; @@ -115,6 +129,30 @@ else fi fi +# Set HQ mode + +if [ "$HQ" = true ]; then + echo "Setting HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 1" $optns.optns + else + echo "set ihq 1" >> $optns.optns + fi + if [ -z "$EPO4HQVSN" ]; then + echo "Error: EPOS4HQ version not found" + exit 7 + else + HEPMC=";HepMC.version=3" + fi +else + echo "Turning OFF HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 0" $optns.optns + else + echo "set ihq 0" >> $optns.optns + fi +fi + # Copy options file in each timeframe folder for i in $(seq 1 $TF); do if [ ! -d tf$i ]; then @@ -125,8 +163,8 @@ done # create workflow -${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py -eCM $eCM -ns $NEV -gen hepmc -tf $TF -j $JOBS \ - -interactionRate 500000 -confKey "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;${more}" +${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py -eCM $eCM -ns $NEV -gen hepmc -tf $TF -j $JOBS -seed $RANDOM \ + -interactionRate 500000 -confKey "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;${more}" # Run workflow ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt aod --stdout-on-failure diff --git a/run/SimExamples/HepMC_EPOS4/rundpl.sh b/run/SimExamples/HepMC_EPOS4/rundpl.sh index c3851175d08f4..919eedf1a2340 100755 --- a/run/SimExamples/HepMC_EPOS4/rundpl.sh +++ b/run/SimExamples/HepMC_EPOS4/rundpl.sh @@ -21,6 +21,18 @@ more="" optns="example" eCM=-1 JOBS=2 +HEPMC="" +HQ=false + +if [ -z "$EPO4VSN" ]; then + echo "Error: EPOS4 version not found" + exit 7 +fi +if [ "$EPO4VSN" == "4.0.0" ]; then + HEPMC=";HepMC.version=2" +else + HEPMC=";HepMC.version=3" +fi usage() { @@ -35,6 +47,7 @@ Options: -j,--jobs JOBS Number of jobs ($JOBS) -e,--ecm ENERGY Center-of-Mass energy -h,--help Print these instructions + -hq HQ Enable EPOS4HQ -- Rest of command line sent to o2-sim COMMAND must be quoted if it contains spaces or other special @@ -56,6 +69,7 @@ while test $# -gt 0 ; do -i|--input) optns=$2 ; shift ;; -j|--jobs) JOBS=$2 ; shift ;; -e|--ecm) eCM=$2 ; shift ;; + -hq) HQ=true ; shift ;; -h|--help) usage; o2-sim-dpl-eventgen --help full ; exit 0 ;; --) shift ; break ;; *) echo "Unknown option '$1', did you forget '--'?" >/dev/stderr @@ -111,9 +125,32 @@ else fi fi -# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory +# Set HQ mode + +if [ "$HQ" = true ]; then + echo "Setting HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 1" $optns.optns + else + echo "set ihq 1" >> $optns.optns + fi + if [ -z "$EPO4HQVSN" ]; then + echo "Error: EPOS4HQ version not found" + exit 7 + else + HEPMC=";HepMC.version=3" + fi +else + echo "Turning OFF HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 0" $optns.optns + else + echo "set ihq 0" >> $optns.optns + fi +fi + +# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory for version 4.0.0 # otherwise the simulation won't work. # Seed is automatically set to Random by the epos.sh script because the --seed option with o2-sim-dpl-eventgen does not feed the number to GeneratorHepMC - -o2-sim-dpl-eventgen -b --nEvents ${NEV} --generator hepmc --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;${more}" |\ +o2-sim-dpl-eventgen -b --nEvents ${NEV} --generator hepmc --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;${more}" |\ o2-sim-mctracks-to-aod -b | o2-analysis-mctracks-to-aod-simple-task -b diff --git a/run/SimExamples/HepMC_EPOS4/runo2sim.sh b/run/SimExamples/HepMC_EPOS4/runo2sim.sh old mode 100644 new mode 100755 index 31698f39a87f0..a241f9affba19 --- a/run/SimExamples/HepMC_EPOS4/runo2sim.sh +++ b/run/SimExamples/HepMC_EPOS4/runo2sim.sh @@ -21,6 +21,18 @@ more="" optns="example" eCM=-1 JOBS=2 +HEPMC="" +HQ=false + +if [ -z "$EPO4VSN" ]; then + echo "Error: EPOS4 version not found" + exit 7 +fi +if [ "$EPO4VSN" == "4.0.0" ]; then + HEPMC=";HepMC.version=2" +else + HEPMC=";HepMC.version=3" +fi usage() { @@ -35,6 +47,7 @@ Options: -j,--jobs JOBS Number of jobs ($JOBS) -e,--ecm ENERGY Center-of-Mass energy -h,--help Print these instructions + -hq HQ Enable EPOS4HQ -- Rest of command line sent to o2-sim COMMAND must be quoted if it contains spaces or other special @@ -56,6 +69,7 @@ while test $# -gt 0 ; do -i|--input) optns=$2 ; shift ;; -j|--jobs) JOBS=$2 ; shift ;; -e|--ecm) eCM=$2 ; shift ;; + -hq) HQ=true ; shift ;; -h|--help) usage; o2-sim --help full ; exit 0 ;; --) shift ; break ;; *) echo "Unknown option '$1', did you forget '--'?" >/dev/stderr @@ -111,7 +125,31 @@ else fi fi -# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory +# Set HQ mode + +if [ "$HQ" = true ]; then + echo "Setting HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 1" $optns.optns + else + echo "set ihq 1" >> $optns.optns + fi + if [ -z "$EPO4HQVSN" ]; then + echo "Error: EPOS4HQ version not found" + exit 7 + else + HEPMC=";HepMC.version=3" + fi +else + echo "Turning OFF HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 0" $optns.optns + else + echo "set ihq 0" >> $optns.optns + fi +fi + +# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory for version 4.0.0 # otherwise the simulation won't work o2-sim -j $JOBS -n ${NEV} -g hepmc --seed $RANDOM \ - --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;${more}" + --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;${more}" From 455f7df21328b0d74a86f36a4f92811f4bc09726 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Wed, 16 Apr 2025 16:31:21 +0200 Subject: [PATCH 0247/1764] Include pO and OO example configurations for Pythia8 --- Generators/share/egconfig/pythia8_OO.cfg | 8 ++++++++ Generators/share/egconfig/pythia8_pO.cfg | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 Generators/share/egconfig/pythia8_OO.cfg create mode 100644 Generators/share/egconfig/pythia8_pO.cfg diff --git a/Generators/share/egconfig/pythia8_OO.cfg b/Generators/share/egconfig/pythia8_OO.cfg new file mode 100644 index 0000000000000..ff098e6b65135 --- /dev/null +++ b/Generators/share/egconfig/pythia8_OO.cfg @@ -0,0 +1,8 @@ +### beams +Beams:idA 1000080160 # Oxygen +Beams:idB 1000080160 # Oxygen +Beams:eCM 10720. # GeV + +### decays +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. diff --git a/Generators/share/egconfig/pythia8_pO.cfg b/Generators/share/egconfig/pythia8_pO.cfg new file mode 100644 index 0000000000000..aff9d3337cd9d --- /dev/null +++ b/Generators/share/egconfig/pythia8_pO.cfg @@ -0,0 +1,8 @@ +### beams +Beams:idA 2212 # proton +Beams:idB 1000080160 # Oxygen +Beams:eCM 13600. # GeV + +### decays +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. From 10cd81636c902b15283a504eae413445e97e6d84 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 16 Apr 2025 15:15:27 +0200 Subject: [PATCH 0248/1764] GPU TPC: When running cluster rejection based on interpolation, also reject during update with current cluster position --- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 34 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 5 ++- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 1617ac7b828af..9e23f9af3cf43 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -691,7 +691,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect, err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || rejectChi2 == rejectInterReject, err2Y, err2Z, ¶m); } GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) @@ -704,7 +704,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict inter->errorY = mC[0]; inter->errorZ = mC[2]; } else if (rejectChi2 == rejectInterReject) { - float chiY, chiZ; + float chi2Y, chi2Z; if (mFitInProjections || mT->NDF() <= 0) { const float Iz0 = inter->posY - mP[0]; const float Iz1 = inter->posZ - mP[1]; @@ -721,8 +721,8 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict const float Jz1 = posZ - ImP1; const float Jw0 = 1.f / (ImC0 + err2Y); const float Jw2 = 1.f / (ImC2 + err2Z); - chiY = Jw0 * Jz0 * Jz0; - chiZ = Jw2 * Jz1 * Jz1; + chi2Y = Jw0 * Jz0 * Jz0; + chi2Z = Jw2 * Jz1 * Jz1; } else { const float Iz0 = inter->posY - mP[0]; const float Iz1 = inter->posZ - mP[1]; @@ -751,11 +751,11 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict Jw0 *= Jdet; const float Jw1 = ImC1 * Jdet; Jw2 *= Jdet; - chiY = CAMath::Abs((Jw0 * Jz0 + Jw1 * Jz1) * Jz0); - chiZ = CAMath::Abs((Jw1 * Jz0 + Jw2 * Jz1) * Jz1); + chi2Y = CAMath::Abs((Jw0 * Jz0 + Jw1 * Jz1) * Jz0); + chi2Z = CAMath::Abs((Jw1 * Jz0 + Jw2 * Jz1) * Jz1); } - if (RejectCluster(chiY * param.rec.tpc.clusterRejectChi2TolleranceY, chiZ * param.rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { // TODO: Relative Pt resolution decreases slightly, why? - return updateErrorClusterRejected; + if (RejectCluster(chi2Y * param.rec.tpc.clusterRejectChi2TolleranceY, chi2Z * param.rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { // TODO: Relative Pt resolution decreases slightly, why? + return updateErrorClusterRejectedInInterpolation; } } return 0; @@ -771,13 +771,13 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int16_t cluste const float z0 = posY - mP[0]; const float z1 = posZ - mP[1]; - float w0, w1, w2, chiY, chiZ; + float w0, w1, w2, chi2Y, chi2Z; if (mFitInProjections || mT->NDF() <= 0) { w0 = 1.f / (err2Y + d00); w1 = 0; w2 = 1.f / (err2Z + d11); - chiY = w0 * z0 * z0; - chiZ = w2 * z1 * z1; + chi2Y = w0 * z0 * z0; + chi2Z = w2 * z1 * z1; } else { w0 = d11 + err2Z, w1 = d10, w2 = d00 + err2Y; { // Invert symmetric matrix @@ -790,13 +790,13 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int16_t cluste w1 = -w1 * det; w2 = w2 * det; } - chiY = CAMath::Abs((w0 * z0 + w1 * z1) * z0); - chiZ = CAMath::Abs((w1 * z0 + w2 * z1) * z1); + chi2Y = CAMath::Abs((w0 * z0 + w1 * z1) * z0); + chi2Z = CAMath::Abs((w1 * z0 + w2 * z1) * z1); } - float dChi2 = chiY + chiZ; - // GPUInfo("hits %d chi2 %f, new %f %f (dy %f dz %f)", N, mChi2, chiY, chiZ, z0, z1); - if (rejectChi2 == 1 && RejectCluster(chiY * param->rec.tpc.clusterRejectChi2TolleranceY, chiZ * param->rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { - return updateErrorClusterRejected; + float dChi2 = chi2Y + chi2Z; + // GPUInfo("hits %d chi2 %f, new %f %f (dy %f dz %f)", N, mChi2, chi2Y, chi2Z, z0, z1); + if (rejectChi2 && RejectCluster(chi2Y * param->rec.tpc.clusterRejectChi2TolleranceY, chi2Z * param->rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { + return updateErrorClusterRejectedInUpdate; } mT->Chi2() += dChi2; mT->NDF() += 2; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index a2369bafc9751..d2d06df7b5710 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -52,7 +52,10 @@ class GPUTPCGMPropagator enum UpdateRetVal { updateErrorFitFailed = -1, updateErrorClusterRejected = 2, - updateErrorEdgeCluster = 3 + updateErrorClusterRejectedDistance = 2, + updateErrorEdgeCluster = 3, + updateErrorClusterRejectedInInterpolation = 4, + updateErrorClusterRejectedInUpdate = 5 }; enum RejectChi2Mode { rejectDirect = 1, diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 77453a87b3763..3b50bec45a41e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -305,7 +305,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ int32_t retVal; float threshold = 3.f + (lastUpdateX >= 0 ? (CAMath::Abs(mX - lastUpdateX) / 2) : 0.f); if (mNDF > 5 && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { - retVal = GPUTPCGMPropagator::updateErrorClusterRejected; + retVal = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 From 84714d9838b3a07f4ed88903e6628071e322b549 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 16 Apr 2025 18:37:13 +0200 Subject: [PATCH 0249/1764] GPU: Fix track buffer size for 0 magnetic field --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f42e5f35b1dc9..bf8d5294bb7c4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -378,8 +378,8 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) } } mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) { - mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { + mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks); } From 63bc2e3893851ef0f849bb4c98c65eae1ba21e47 Mon Sep 17 00:00:00 2001 From: noferini <9963644+noferini@users.noreply.github.com> Date: Sat, 4 Jan 2025 11:17:58 +0100 Subject: [PATCH 0250/1764] fix in FT0 digitization (time wrt BC) --- Detectors/FIT/FT0/simulation/src/Digitizer.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/FIT/FT0/simulation/src/Digitizer.cxx b/Detectors/FIT/FT0/simulation/src/Digitizer.cxx index a261475df31f5..aca012f1bc5a9 100644 --- a/Detectors/FIT/FT0/simulation/src/Digitizer.cxx +++ b/Detectors/FIT/FT0/simulation/src/Digitizer.cxx @@ -220,7 +220,7 @@ void Digitizer::process(const std::vector* hits, // Subtract time-of-flight from hit time const Float_t timeOfFlight = hit.GetPos().R() / o2::constants::physics::LightSpeedCm2NS; const Float_t timeOffset = is_A_side ? params.hitTimeOffsetA : params.hitTimeOffsetC; - Double_t hit_time = hit.GetTime() - timeOfFlight + timeOffset; + Double_t hit_time = hit.GetTime() - timeOfFlight + timeOffset + mIntRecord.getTimeOffsetWrtBC(); if (hit_time > 150) { continue; // not collect very slow particles @@ -285,7 +285,7 @@ void Digitizer::storeBC(BCCache& bc, if (mCalibOffset) { miscalib = mCalibOffset->mTimeOffsets[ipmt]; } - int smeared_time = 1000. * (*cfd.particle - params.mCfdShift) * params.mChannelWidthInverse + miscalib + int(1000. * mIntRecord.getTimeOffsetWrtBC() * params.mChannelWidthInverse); + int smeared_time = 1000. * (*cfd.particle - params.mCfdShift) * params.mChannelWidthInverse + miscalib; // + int(1000. * mIntRecord.getTimeOffsetWrtBC() * params.mChannelWidthInverse); bool is_time_in_signal_gate = (smeared_time > -params.mTime_trg_gate && smeared_time < params.mTime_trg_gate); float charge = measure_amplitude(channel_times) * params.mCharge2amp; float amp = is_time_in_signal_gate ? params.mMV_2_Nchannels * charge : 0; From d98f5354db54f8410925a3578a8d7cead2079fae Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 17 Apr 2025 01:55:38 +0200 Subject: [PATCH 0251/1764] Fix margin for 0 B-field in the GPU code --- GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx index 7793fac7e03ac..cf35a7f261167 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx @@ -195,7 +195,7 @@ int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(float nominalFieldkG, StoredField_t type = kUnknown; - if (fabsf(nominalFieldkG) < 0.01f) { + if (fabsf(nominalFieldkG) < 0.013f) { type = kUniform; nominalFieldkG = 0; } else if (fabsf(fabsf(nominalFieldkG) - 5.00668f) <= fabsf(fabsf(nominalFieldkG) - 2.f)) { From 9a197c20a013b43f11c1605214307306a78a9c1a Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 16 Apr 2025 10:06:28 +0200 Subject: [PATCH 0252/1764] Use common FatalWhenNull setting in getSpecificForRun --- CCDB/include/CCDB/BasicCCDBManager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CCDB/include/CCDB/BasicCCDBManager.h b/CCDB/include/CCDB/BasicCCDBManager.h index 8af1817718fa2..b7bf6920a5c7c 100644 --- a/CCDB/include/CCDB/BasicCCDBManager.h +++ b/CCDB/include/CCDB/BasicCCDBManager.h @@ -330,7 +330,7 @@ T* CCDBManagerInstance::getForRun(std::string const& path, int runNumber, bool s template T* CCDBManagerInstance::getSpecificForRun(std::string const& path, int runNumber, MD metaData) { - auto [start, stop] = getRunDuration(runNumber); + auto [start, stop] = getRunDuration(runNumber, mFatalWhenNull); if (start < 0 || stop < 0) { if (mFatalWhenNull) { reportFatal(std::string("Failed to get run duration for run ") + std::to_string(runNumber) + std::string(" from CCDB")); From 919e8f2c8b81d80d3d249e289dcdea657b4377ae Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 14 Apr 2025 11:02:43 +0200 Subject: [PATCH 0253/1764] DPL: set runNumber in DataHeader from raw data readers --- Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx | 7 ++++++- Detectors/Raw/src/RawFileReaderWorkflow.cxx | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx index ef59d94b26048..07a62a7fd4a58 100644 --- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx +++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx @@ -327,7 +327,12 @@ void TFReaderSpec::stopProcessing(o2f::ProcessingContext& ctx) auto device = ctx.services().get().device(); o2f::SourceInfoHeader exitHdr; exitHdr.state = o2f::InputChannelState::Completed; - const auto exitStack = o2h::Stack(o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0), o2f::DataProcessingHeader(), exitHdr); + o2h::DataHeader dh = o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0); + try { + dh.runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } + const auto exitStack = o2h::Stack(dh, o2f::DataProcessingHeader(), exitHdr); auto fmqFactory = device->GetChannel(mInput.rawChannelConfig, 0).Transport(); auto hdEOSMessage = fmqFactory->CreateMessage(exitStack.size(), fair::mq::Alignment{64}); auto plEOSMessage = fmqFactory->CreateMessage(0, fair::mq::Alignment{64}); diff --git a/Detectors/Raw/src/RawFileReaderWorkflow.cxx b/Detectors/Raw/src/RawFileReaderWorkflow.cxx index 46f7ba2a5c7f1..a7313f3154ac2 100644 --- a/Detectors/Raw/src/RawFileReaderWorkflow.cxx +++ b/Detectors/Raw/src/RawFileReaderWorkflow.cxx @@ -347,7 +347,12 @@ void RawReaderSpecs::run(o2f::ProcessingContext& ctx) if (!mRawChannelName.empty()) { // send endOfStream message to raw channel o2f::SourceInfoHeader exitHdr; exitHdr.state = o2f::InputChannelState::Completed; - const auto exitStack = o2::header::Stack(o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0), o2f::DataProcessingHeader(), exitHdr); + o2h::DataHeader dh = o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0); + try { + dh.runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } + const auto exitStack = o2::header::Stack(dh, o2f::DataProcessingHeader(), exitHdr); auto fmqFactory = device->GetChannel(mRawChannelName, 0).Transport(); auto hdEOSMessage = fmqFactory->CreateMessage(exitStack.size(), fair::mq::Alignment{64}); auto plEOSMessage = fmqFactory->CreateMessage(0, fair::mq::Alignment{64}); From a7246d0f2f8c01fd893483fe83e902a1ff5c7e47 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 09:33:09 +0200 Subject: [PATCH 0254/1764] GPU: Use a unified constant instead of copy and paste for 0 field cut --- GPU/Common/GPUCommonConstants.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index 01d67eab1f9d3..1a7e34885c34a 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -20,6 +20,7 @@ namespace o2::gpu::gpu_common_constants { static constexpr const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP +static constexpr const float kZeroFieldCut = 0.013f; } #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index bf8d5294bb7c4..82b21e2045b8e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -378,7 +378,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) } } mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks); @@ -1743,7 +1743,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.DzDs() = p2.DzDs(); p1.QPt() = p2.QPt(); mergedTrack.SetAlpha(p2.Alpha()); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx index cf35a7f261167..6717ac775b077 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx @@ -195,7 +195,7 @@ int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(float nominalFieldkG, StoredField_t type = kUnknown; - if (fabsf(nominalFieldkG) < 0.013f) { + if (fabsf(nominalFieldkG) < gpu_common_constants::kZeroFieldCut) { type = kUniform; nominalFieldkG = 0; } else if (fabsf(fabsf(nominalFieldkG) - 5.00668f) <= fabsf(fabsf(nominalFieldkG) - 2.f)) { From b45085c120b07fd313e9d0d546705a0f9c0f7c3e Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Thu, 17 Apr 2025 11:04:40 +0200 Subject: [PATCH 0255/1764] Include 20Neon in Pythia8 particles database --- Generators/share/egconfig/pythia8_NeNe.cfg | 8 ++++++++ Generators/src/GeneratorPythia8.cxx | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 Generators/share/egconfig/pythia8_NeNe.cfg diff --git a/Generators/share/egconfig/pythia8_NeNe.cfg b/Generators/share/egconfig/pythia8_NeNe.cfg new file mode 100644 index 0000000000000..fff1dbb5f3d59 --- /dev/null +++ b/Generators/share/egconfig/pythia8_NeNe.cfg @@ -0,0 +1,8 @@ +### beams +Beams:idA 1000100200 # Neon +Beams:idB 1000100200 # Neon +Beams:eCM 10720. # GeV + +### decays +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. diff --git a/Generators/src/GeneratorPythia8.cxx b/Generators/src/GeneratorPythia8.cxx index 385eb148412ef..e883ceb6470a9 100644 --- a/Generators/src/GeneratorPythia8.cxx +++ b/Generators/src/GeneratorPythia8.cxx @@ -209,6 +209,8 @@ Bool_t GeneratorPythia8::Init() mPythia.setUserHooksPtr((Pythia8::UserHooksPtr)powhegHooks); } } + /** Add 20Neon to collision particle database */ + mPythia.particleData.addParticle(1000100200, "20Ne", 6, 30, 0, 19.992440); /** initialise **/ if (!mPythia.init()) { LOG(fatal) << "Failed to init \'Pythia8\': init returned with error"; From 99c08d385ed02f7dfb2f8e2032f6a2882bd24171 Mon Sep 17 00:00:00 2001 From: pillot Date: Thu, 17 Apr 2025 17:08:29 +0200 Subject: [PATCH 0256/1764] improve track extrapolation to vtx wo MCS correction (#14189) --- .../include/MCHTracking/TrackExtrap.h | 16 +++++++++----- .../MUON/MCH/Tracking/src/TrackExtrap.cxx | 21 ++++++++++++++++++- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h b/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h index 709d2c991a1be..37b3cafb90523 100644 --- a/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h +++ b/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h @@ -18,6 +18,7 @@ #define O2_MCH_TRACKEXTRAP_H_ #include +#include #include @@ -70,17 +71,21 @@ class TrackExtrap /// Add branson correction resolution to parameter covariances return extrapToVertex(trackParam, xVtx, yVtx, zVtx, errXVtx, errYVtx, true, false); } - static bool extrapToVertexWithoutBranson(TrackParam& trackParam, double zVtx) + static bool extrapToVertexWithoutBranson(TrackParam& trackParam, double zVtx, + double xUpstream = 0., double yUpstream = 0., + std::optional zUpstream = std::nullopt) { /// Extrapolate track parameters to vertex, corrected for energy loss effects only /// Add dispersion due to multiple scattering and energy loss fluctuation to parameter covariances - return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, true); + return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, true, xUpstream, yUpstream, zUpstream); } - static bool extrapToVertexUncorrected(TrackParam& trackParam, double zVtx) + static bool extrapToVertexUncorrected(TrackParam& trackParam, double zVtx, + double xUpstream = 0., double yUpstream = 0., + std::optional zUpstream = std::nullopt) { /// Extrapolate track parameters to vertex without multiple scattering and energy loss corrections /// Add dispersion due to multiple scattering to parameter covariances - return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, false); + return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, false, xUpstream, yUpstream, zUpstream); } static bool extrapToMID(TrackParam& trackParam); @@ -92,7 +97,8 @@ class TrackExtrap private: static bool extrapToVertex(TrackParam& trackParam, double xVtx, double yVtx, double zVtx, - double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss); + double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss, + double xUpstream = 0., double yUpstream = 0., std::optional zUpstream = std::nullopt); static bool getAbsorberCorrectionParam(double trackXYZIn[3], double trackXYZOut[3], double pTotal, double& pathLength, double& f0, double& f1, double& f2, diff --git a/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx b/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx index a91d42aca0f3b..09b8d6d3b330f 100644 --- a/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx +++ b/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx @@ -276,7 +276,8 @@ bool TrackExtrap::extrapToMID(TrackParam& trackParam) //__________________________________________________________________________ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVtx, double zVtx, - double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss) + double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss, + double xUpstream, double yUpstream, std::optional zUpstream) { /// Main method for extrapolation to the vertex: /// Returns the track parameters and covariances resulting from the extrapolation of the current trackParam @@ -285,6 +286,8 @@ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVt /// if correctForMCS=false: add parameter dispersion due to MCS in parameter covariances /// if correctForEnergyLoss=true: correct parameters for energy loss and add energy loss fluctuation to covariances /// if correctForEnergyLoss=false: do nothing about energy loss + /// In case correctForMCS=false and the position of the track upstream the absorber is provided, it is used + /// to compute the absorber correction parameters, instead of the extrapolated track position from downstream if (trackParam.getZ() == zVtx) { return true; // nothing to be done if already at vertex @@ -301,6 +304,18 @@ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVt } } + // check the upstream track position with respect to the absorber if provided and used (spectro z<0) + // zUpstream must be >= SAbsZBeg with 100 µm tolerance to account for numerical precision + if (!correctForMCS && zUpstream && *zUpstream < SAbsZBeg - 0.01) { + if (*zUpstream < SAbsZEnd) { + LOG(warning) << "Upstream Z (" << *zUpstream << ") downstream the front absorber (zAbsorberEnd = " << SAbsZEnd << ")"; + return false; + } else { + LOG(warning) << "Upstream Z (" << *zUpstream << ") inside the front absorber (" << SAbsZBeg << ", " << SAbsZEnd << ")"; + return false; + } + } + // Check the track position with respect to the vertex and the absorber (spectro z<0) if (trackParam.getZ() > SAbsZEnd) { if (trackParam.getZ() > zVtx) { @@ -328,6 +343,10 @@ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVt trackXYZIn[2] = SAbsZBeg; trackXYZIn[0] = trackXYZOut[0] + (xVtx - trackXYZOut[0]) / (zVtx - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); trackXYZIn[1] = trackXYZOut[1] + (yVtx - trackXYZOut[1]) / (zVtx - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); + } else if (zUpstream) { // or linear propagation to the upstream track position + trackXYZIn[2] = SAbsZBeg; + trackXYZIn[0] = trackXYZOut[0] + (xUpstream - trackXYZOut[0]) / (*zUpstream - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); + trackXYZIn[1] = trackXYZOut[1] + (yUpstream - trackXYZOut[1]) / (*zUpstream - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); } else { // or standard propagation without vertex constraint TrackParam trackParamIn(trackParam); if (!extrapToZ(trackParamIn, SAbsZBeg)) { From 8c94458e83debdbc773d4e1a57923a6e63733c2d Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 17 Apr 2025 20:36:15 +0200 Subject: [PATCH 0257/1764] Saner publishing period for FLP case (#14198) --- Framework/Core/src/CommonServices.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index 5c333bbb85e3b..22324cd84b390 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -891,6 +891,11 @@ o2::framework::ServiceSpec CommonServices::dataProcessingStats() if (!DefaultsHelpers::onlineDeploymentMode() && DefaultsHelpers::deploymentMode() != DeploymentMode::FST) { arrowAndResourceLimitingMetrics = true; } + + int64_t consumedTimeframesPublishInterval = 0; + if (DefaultsHelpers::deploymentMode() == DeploymentMode::OnlineECS) { + consumedTimeframesPublishInterval = 5000; + } // Input proxies should not report cpu_usage_fraction, // because of the rate limiting which biases the measurement. auto& spec = services.get(); @@ -950,7 +955,7 @@ o2::framework::ServiceSpec CommonServices::dataProcessingStats() MetricSpec{.name = "consumed-timeframes", .metricId = (int)ProcessingStatsId::CONSUMED_TIMEFRAMES, .kind = Kind::UInt64, - .minPublishInterval = 0, + .minPublishInterval = consumedTimeframesPublishInterval, .maxRefreshLatency = quickRefreshInterval, .sendInitialValue = true}, MetricSpec{.name = "min_input_latency_ms", From b856a634468cfe8e80890ce3d6b77fb7bbba9dd6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 15:52:54 +0200 Subject: [PATCH 0258/1764] GPU CMake: Fix compile flags, particularly HIP deterministic mode was missing -ffp-contract=off --- dependencies/FindO2GPU.cmake | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 95db55041184f..d50705d106bf3 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -162,7 +162,7 @@ if(ENABLE_CUDA) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math --ftz=true") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math ${GPUCA_CUDA_DENORMALS_FLAGS}") endif() if(CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)") string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call") @@ -293,11 +293,10 @@ if(ENABLE_HIP) if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage") endif() - if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - string(APPEND CMAKE_HIP_FLAGS " -ffast-math") - endif() - if(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -O3") + if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) + string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") + string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -ffast-math -O3") endif() string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics if(HIP_AMDGPUTARGET) From fc52b5db290f2ebeced93820c8de20379f9f3283 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 16 Apr 2025 18:44:10 +0200 Subject: [PATCH 0259/1764] GPU TPC: Change some code in sector tracking from prepreccor #if to if constexpr, to avoid preprocessing the RTC code --- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 153 +++++++++--------- .../SectorTracker/GPUTPCNeighboursFinder.h | 2 - .../SectorTracker/GPUTPCStartHitsFinder.cxx | 50 +++--- .../SectorTracker/GPUTPCTrackletSelector.cxx | 48 +++--- .../SectorTracker/GPUTPCTrackletSelector.h | 2 - 5 files changed, 127 insertions(+), 128 deletions(-) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index d76c079bb406f..4cdb4d151eba4 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -75,14 +75,10 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh return; } -#define UnrollGlobal 4 -#define MaxShared GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP -#if MaxShared < GPUCA_MAXN -#define MaxGlobal ((GPUCA_MAXN - MaxShared - 1) / UnrollGlobal + 1) * UnrollGlobal -#else -#define MaxGlobal 0 -#endif -#define MaxTotal MaxShared + MaxGlobal + static constexpr uint32_t UNROLL_GLOBAL = 4; + static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP; + static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0; + static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL; const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx); // float chi2Cut = 3.f*3.f*(s.mUpDx*s.mUpDx + s.mDnDx*s.mDnDx ); //SG @@ -117,10 +113,8 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx; const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx; -#if MaxGlobal > 0 - calink neighUp[MaxGlobal]; - float yzUp[2 * MaxGlobal]; -#endif + calink neighUp[MAX_GLOBAL]; + float yzUp[2 * MAX_GLOBAL]; for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) { @@ -128,7 +122,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh const float y = y0 + hitData.x * stepY; const float z = z0 + hitData.y * stepZ; - int32_t nNeighUp = 0; + uint32_t nNeighUp = 0; float minZ, maxZ, minY, maxY; int32_t binYmin, binYmax, binZmin, binZmax; int32_t nY; @@ -145,11 +139,11 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh nY = rowUp.Grid().Ny(); } - for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MaxTotal); k1++) { + for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MAX_TOTAL); k1++) { int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin]; int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1]; GPUCA_UNROLL(U(4), U(2)) - for (int32_t i = iMin; i < iMax && (nNeighUp < MaxTotal); i++) { + for (int32_t i = iMin; i < iMax && (nNeighUp < MAX_TOTAL); i++) { const GPUglobalref() cahit2& hitDataUp = pHitData[lHitNumberOffsetUp + i]; GPUTPCHit h; h.mY = y0Up + (hitDataUp.x) * stepYUp; @@ -159,51 +153,48 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh continue; } -#if MaxGlobal > 0 -#if MaxShared == 0 - if (true) { -#else - if (nNeighUp >= MaxShared) { -#endif - neighUp[nNeighUp - MaxShared] = (calink)i; - yzUp[2 * (nNeighUp - MaxShared)] = s.mDnDx * (h.Y() - y); - yzUp[2 * (nNeighUp - MaxShared) + 1] = s.mDnDx * (h.Z() - z); - } else -#endif - { -#if MaxShared > 0 - s.mB[nNeighUp][iThread] = (calink)i; - s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y); - s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z); -#endif + const bool inGlobal = nNeighUp >= MAX_SHARED; + if constexpr (MAX_GLOBAL > 0) { + if (inGlobal) { + neighUp[nNeighUp - MAX_SHARED] = (calink)i; + yzUp[2 * (nNeighUp - MAX_SHARED)] = s.mDnDx * (h.Y() - y); + yzUp[2 * (nNeighUp - MAX_SHARED) + 1] = s.mDnDx * (h.Z() - z); + } + } + if constexpr (MAX_SHARED > 0) { + if (!inGlobal) { + s.mB[nNeighUp][iThread] = (calink)i; + s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y); + s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z); + } } nNeighUp++; } } -#if MaxShared > 0 // init a rest of the shared array - for (int32_t iUp = nNeighUp; iUp < MaxShared; iUp++) { - s.mA1[iUp][iThread] = -1.e10f; - s.mA2[iUp][iThread] = -1.e10f; - s.mB[iUp][iThread] = (calink)-1; + if constexpr (MAX_SHARED > 0) { // init the rest of the shared array + for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) { + s.mA1[iUp][iThread] = -1.e10f; + s.mA2[iUp][iThread] = -1.e10f; + s.mB[iUp][iThread] = (calink)-1; + } } -#endif -#if MaxGlobal > 0 // init a rest of the UnrollGlobal chunk of the global array - int32_t Nrest = nNeighUp - MaxShared; - int32_t N4 = (Nrest / UnrollGlobal) * UnrollGlobal; - if (N4 < Nrest) { - N4 += UnrollGlobal; - GPUCA_UNROLL(U(UnrollGlobal - 1), U(UnrollGlobal - 1)) - for (int32_t k = 0; k < UnrollGlobal - 1; k++) { - if (Nrest + k < N4) { - yzUp[2 * (Nrest + k)] = -1.e10f; - yzUp[2 * (Nrest + k) + 1] = -1.e10f; - neighUp[Nrest + k] = (calink)-1; + const uint32_t Nrest = nNeighUp - MAX_SHARED; + uint32_t N4 = (Nrest / UNROLL_GLOBAL) * UNROLL_GLOBAL; + if constexpr (MAX_GLOBAL > 0) { // init the rest of the UNROLL_GLOBAL chunk of the global array + if (nNeighUp > MAX_SHARED && N4 < Nrest) { + N4 += UNROLL_GLOBAL; + GPUCA_UNROLL(U(UNROLL_GLOBAL - 1), U(UNROLL_GLOBAL - 1)) + for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) { + if (Nrest + k < N4) { + yzUp[2 * (Nrest + k)] = -1.e10f; + yzUp[2 * (Nrest + k) + 1] = -1.e10f; + neighUp[Nrest + k] = (calink)-1; + } } } } -#endif { // area in the lower row const float yy = y * s.mDnTx; @@ -236,47 +227,49 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh float yDnProjUp = s.mUpDx * (yDn - y); float zDnProjUp = s.mUpDx * (zDn - z); -#if MaxShared > 0 - GPUCA_UNROLL(U(MaxShared), U(MaxShared)) - for (int32_t iUp = 0; iUp < MaxShared; iUp++) { - const float dy = yDnProjUp - s.mA1[iUp][iThread]; - const float dz = zDnProjUp - s.mA2[iUp][iThread]; - const float d = dy * dy + dz * dz; - if (d < bestD) { - bestD = d; - linkDn = i; - linkUp = iUp; - } - } -#endif - -#if MaxGlobal > 0 - for (int32_t iUp = 0; iUp < N4; iUp += UnrollGlobal) { - GPUCA_UNROLL(U(UnrollGlobal), U(UnrollGlobal)) - for (int32_t k = 0; k < UnrollGlobal; k++) { - int32_t jUp = iUp + k; - const float dy = yDnProjUp - yzUp[2 * jUp]; - const float dz = zDnProjUp - yzUp[2 * jUp + 1]; + if constexpr (MAX_SHARED > 0) { + GPUCA_UNROLL(U(MAX_SHARED), U(MAX_SHARED)) + for (uint32_t iUp = 0; iUp < MAX_SHARED; iUp++) { + const float dy = yDnProjUp - s.mA1[iUp][iThread]; + const float dz = zDnProjUp - s.mA2[iUp][iThread]; const float d = dy * dy + dz * dz; if (d < bestD) { bestD = d; linkDn = i; - linkUp = MaxShared + jUp; + linkUp = iUp; + } + } + } + + if constexpr (MAX_GLOBAL > 0) { + if (nNeighUp > MAX_SHARED) { + for (uint32_t iUp = 0; iUp < N4; iUp += UNROLL_GLOBAL) { + GPUCA_UNROLL(U(UNROLL_GLOBAL), U(UNROLL_GLOBAL)) + for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) { + const uint32_t jUp = iUp + k; + const float dy = yDnProjUp - yzUp[2 * jUp]; + const float dz = zDnProjUp - yzUp[2 * jUp + 1]; + const float d = dy * dy + dz * dz; + if (d < bestD) { + bestD = d; + linkDn = i; + linkUp = MAX_SHARED + jUp; + } + } } } } -#endif } } if (linkUp >= 0) { -#if MaxShared > 0 && MaxGlobal > 0 - linkUp = (linkUp >= MaxShared) ? neighUp[linkUp - MaxShared] : s.mB[linkUp][iThread]; -#elif MaxShared > 0 - linkUp = s.mB[linkUp][iThread]; -#else - linkUp = neighUp[linkUp]; -#endif + if constexpr (MAX_SHARED > 0 && MAX_GLOBAL > 0) { + linkUp = ((uint32_t)linkUp >= MAX_SHARED) ? neighUp[linkUp - MAX_SHARED] : s.mB[linkUp][iThread]; + } else if constexpr (MAX_SHARED > 0) { + linkUp = s.mB[linkUp][iThread]; + } else { + linkUp = neighUp[linkUp]; + } } tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 0ecd230a67415..6bdc637b6bad6 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -40,12 +40,10 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number -#if GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 static_assert(GPUCA_MAXN >= GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP); float mA1[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; float mA2[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; calink mB[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; -#endif GPUTPCRow mRow, mRowUp, mRowDown; }; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index 20dfd69864816..06dac4a68c540 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -39,36 +39,38 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; if (tracker.mData.mLinkDownData[lHitNumberOffset + ih] == CALINK_INVAL && linkUpData != CALINK_INVAL && tracker.mData.mLinkUpData[rowUp.mHitNumberOffset + linkUpData] != CALINK_INVAL) { -#if GPUCA_PAR_SORT_STARTHITS > 0 - GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; - uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); - if (nextRowStartHits >= tracker.mNMaxRowStartHits) { - tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); - CAMath::AtomicExchShared(&s.mNRowStartHits, tracker.mNMaxRowStartHits); - break; + GPUglobalref() GPUTPCHitId* GPUrestrict() startHits; + uint32_t nextRowStartHits; + if constexpr (GPUCA_PAR_SORT_STARTHITS > 0) { + startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; + nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); + if (nextRowStartHits >= tracker.mNMaxRowStartHits) { + tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); + CAMath::AtomicExchShared(&s.mNRowStartHits, tracker.mNMaxRowStartHits); + break; + } + } else { + startHits = tracker.mTrackletStartHits; + nextRowStartHits = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, 1u); + if (nextRowStartHits >= tracker.mNMaxStartHits) { + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); + CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); + break; + } } -#else - GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletStartHits; - uint32_t nextRowStartHits = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, 1u); - if (nextRowStartHits >= tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); - CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); - break; - } -#endif startHits[nextRowStartHits].Set(s.mIRow, ih); } } GPUbarrier(); -#if GPUCA_PAR_SORT_STARTHITS > 0 - if (iThread == 0) { - uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); - tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; - if (nOffset + s.mNRowStartHits > tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); - CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); + if constexpr (GPUCA_PAR_SORT_STARTHITS > 0) { + if (iThread == 0) { + uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); + tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; + if (nOffset + s.mNRowStartHits > tracker.mNMaxStartHits) { + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); + CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); + } } } -#endif } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index e27a8f66ae754..0bf3448bed730 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -48,11 +48,11 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread int32_t irow = firstRow; - int32_t gap = 0; - int32_t nShared = 0; - int32_t nHits = 0; - const int32_t minHits = tracker.Param().rec.tpc.minNClustersTrackSeed == -1 ? GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracklet.Param().QPt() * tracker.Param().qptB5Scaler) : tracker.Param().rec.tpc.minNClustersTrackSeed; - const int32_t sharingMinNorm = minHits * tracker.Param().rec.tpc.trackletMinSharedNormFactor; + uint32_t gap = 0; + uint32_t nShared = 0; + uint32_t nHits = 0; + const uint32_t minHits = tracker.Param().rec.tpc.minNClustersTrackSeed == -1 ? GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracklet.Param().QPt() * tracker.Param().qptB5Scaler) : tracker.Param().rec.tpc.minNClustersTrackSeed; + const uint32_t sharingMinNorm = minHits * tracker.Param().rec.tpc.trackletMinSharedNormFactor; float maxShared = maxSharedFrac * sharingMinNorm; GPUCA_UNROLL(, U(1)) @@ -63,16 +63,20 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread } if (ih != CALINK_INVAL && ih != CALINK_DEAD_CHANNEL) { GPUglobalref() const GPUTPCRow& row = tracker.Row(irow); - bool own = (tracker.HitWeight(row, ih) <= w); - bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); + const bool own = (tracker.HitWeight(row, ih) <= w); + const bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! gap = 0; -#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (nHits < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { - s.mHits[nHits][iThread].Set(irow, ih); - } else -#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + const bool inShared = nHits < (uint32_t)GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE; +#pragma GCC diagnostic pop + if constexpr (GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE > 0) { + if (inShared) { + s.mHits[nHits][iThread].Set(irow, ih); + } + } + if (!inShared) { trackHits[nHits - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); } nHits++; @@ -100,13 +104,17 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread tracker.Tracks()[itrout].SetParam(tracklet.Param()); tracker.Tracks()[itrout].SetFirstHitID(nFirstTrackHit); tracker.Tracks()[itrout].SetNHits(nHits); - for (int32_t jh = 0; jh < nHits; jh++) { -#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (jh < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { - tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; - } else -#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - { + for (uint32_t jh = 0; jh < nHits; jh++) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + const bool inShared = jh < (uint32_t)GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE; +#pragma GCC diagnostic pop + if constexpr (GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE > 0) { + if (inShared) { + tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; + } + } + if (!inShared) { tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index e5a28c80f37f9..070e02fad8222 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -36,10 +36,8 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNThreadsTotal; // total n threads int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons -#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 static_assert(GPUCA_ROW_COUNT >= GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE); GPUTPCHitId mHits[GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; -#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; typedef GPUconstantref() GPUTPCTracker processorType; From e966e71fcd2033de0b95fae6d1e437381b4b9a1a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 10:10:11 +0200 Subject: [PATCH 0260/1764] GPU: Add missing static_assert to guarantee correct unroll factor, will crash otherwise (apparently forgotten by AMD) --- GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index 4cdb4d151eba4..6731fed55cc9c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -76,6 +76,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } static constexpr uint32_t UNROLL_GLOBAL = 4; + static_assert(GPUCA_MAXN % UNROLL_GLOBAL == 0); static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP; static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0; static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL; From 2ab600016e4a20b2e0dfea2cae94105dab3ca942 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 13:40:07 +0200 Subject: [PATCH 0261/1764] GPU: Make some of the optimizations AMD did for the neighbors finder for MI50 optional --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 ++++- .../Definitions/GPUDefParametersDefaults.h | 12 +++++++++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 27 ++++++++++--------- GPU/GPUTracking/kernels.cmake | 2 ++ 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index b4dac39ae1cd2..acca74e57a80e 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -263,7 +263,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.deterministicGPUReconstruction) { #ifndef GPUCA_DETERMINISTIC_MODE - GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); + GPUError("WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; @@ -274,6 +274,10 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.createO2Output = 1; } mProcessingSettings.rtc.deterministic = 1; + } else { +#ifdef GPUCA_DETERMINISTIC_MODE + GPUError("WARNING, compiled with GPUCA_DETERMINISTIC_MODE but deterministicGPUReconstruction not set, only compile-time determinism and deterministic math enforced, not fully deterministic!"); +#endif } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index b212abbcd2707..4ee6b23d46b51 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -516,6 +516,12 @@ #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 4 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 1 + #endif #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 12 #endif @@ -544,6 +550,12 @@ #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 0 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 + #endif #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 0 #endif diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 34fac6514851c..9d1772379f6bd 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -284,7 +284,7 @@ AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for m AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") -AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6") +AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics") AddOption(runCompressionStatistics, bool, false, "compressionStat", 0, "Run statistics and verification for cluster compression") AddOption(resetTimers, int8_t, 1, "", 0, "Reset timers every event") diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index 6731fed55cc9c..54ce7f12c655f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -75,7 +75,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh return; } - static constexpr uint32_t UNROLL_GLOBAL = 4; + static constexpr uint32_t UNROLL_GLOBAL = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL > 1 ? GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL : 1; static_assert(GPUCA_MAXN % UNROLL_GLOBAL == 0); static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP; static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0; @@ -173,7 +173,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } } - if constexpr (MAX_SHARED > 0) { // init the rest of the shared array + if constexpr (MAX_SHARED > 0 && GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED) { // init the rest of the shared array for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) { s.mA1[iUp][iThread] = -1.e10f; s.mA2[iUp][iThread] = -1.e10f; @@ -181,17 +181,17 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } } - const uint32_t Nrest = nNeighUp - MAX_SHARED; - uint32_t N4 = (Nrest / UNROLL_GLOBAL) * UNROLL_GLOBAL; - if constexpr (MAX_GLOBAL > 0) { // init the rest of the UNROLL_GLOBAL chunk of the global array - if (nNeighUp > MAX_SHARED && N4 < Nrest) { - N4 += UNROLL_GLOBAL; + const uint32_t nRest = nNeighUp - MAX_SHARED; + uint32_t nRestUnrolled = (nRest / UNROLL_GLOBAL) * UNROLL_GLOBAL; + if constexpr (MAX_GLOBAL > 1) { // init the rest of the UNROLL_GLOBAL chunk of the global array + if (nNeighUp > MAX_SHARED && nRestUnrolled < nRest) { + nRestUnrolled += UNROLL_GLOBAL; GPUCA_UNROLL(U(UNROLL_GLOBAL - 1), U(UNROLL_GLOBAL - 1)) for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) { - if (Nrest + k < N4) { - yzUp[2 * (Nrest + k)] = -1.e10f; - yzUp[2 * (Nrest + k) + 1] = -1.e10f; - neighUp[Nrest + k] = (calink)-1; + if (nRest + k < nRestUnrolled) { + yzUp[2 * (nRest + k)] = -1.e10f; + yzUp[2 * (nRest + k) + 1] = -1.e10f; + neighUp[nRest + k] = (calink)-1; } } } @@ -229,8 +229,9 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh float zDnProjUp = s.mUpDx * (zDn - z); if constexpr (MAX_SHARED > 0) { + const uint32_t maxSharedUp = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED ? MAX_SHARED : CAMath::Min(nNeighUp, MAX_SHARED); GPUCA_UNROLL(U(MAX_SHARED), U(MAX_SHARED)) - for (uint32_t iUp = 0; iUp < MAX_SHARED; iUp++) { + for (uint32_t iUp = 0; iUp < maxSharedUp; iUp++) { const float dy = yDnProjUp - s.mA1[iUp][iThread]; const float dz = zDnProjUp - s.mA2[iUp][iThread]; const float d = dy * dy + dz * dz; @@ -244,7 +245,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh if constexpr (MAX_GLOBAL > 0) { if (nNeighUp > MAX_SHARED) { - for (uint32_t iUp = 0; iUp < N4; iUp += UNROLL_GLOBAL) { + for (uint32_t iUp = 0; iUp < nRestUnrolled; iUp += UNROLL_GLOBAL) { GPUCA_UNROLL(U(UNROLL_GLOBAL), U(UNROLL_GLOBAL)) for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) { const uint32_t jUp = iUp + k; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index ee3af2b87d925..fcf576d828b7f 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -136,6 +136,8 @@ o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALR o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP + NEIGHBOURS_FINDER_UNROLL_GLOBAL + NEIGHBOURS_FINDER_UNROLL_SHARED TRACKLET_SELECTOR_HITS_REG_SIZE ALTERNATE_BORDER_SORT SORT_BEFORE_FIT From 233a4e4bf1f5ea05c218598c05ba734909d3155f Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 18 Apr 2025 09:32:40 +0200 Subject: [PATCH 0262/1764] Common: DCAFitter explanation to README --- Common/DCAFitter/README.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Common/DCAFitter/README.md b/Common/DCAFitter/README.md index 1699ffb4f8aca..e385378d10caf 100644 --- a/Common/DCAFitter/README.md +++ b/Common/DCAFitter/README.md @@ -93,3 +93,41 @@ In this case the relevant correlation coefficient of the cov.matrix is redefined `DCAFitterN::setBadCovPolicy(DCAFitterN::OverrideAnFlag);` continue fit with overridden cov.matrix but set the propagation failure flag (can be checked using the same `isPropagationFailure(int cand = 0)` method). +## Fit status +The fitter provides a fit status for each candidate, which can be retrieved using: +``` +FitStatus status = ft.getFitStatus(int cand = 0); +``` +The possible values are: +``` +enum FitStatus : uint8_t { // part of the DCAFitterN class + None, // no status set (should not be possible!) + + /* Good Conditions */ + Converged, // fit converged + MaxIter, // max iterations reached before fit convergence (can still be a good vertex) + + /* Error Conditions */ + NoCrossing, // no reasonable crossing was found + RejRadius, // radius of crossing was not acceptable + RejTrackX, // one candidate track x was below the minimum required radius + RejTrackRoughZ, // rejected by rough cut on tracks Z difference + RejChi2Max, // rejected by maximum chi2 cut + FailProp, // propagation of at least prong to PCA failed + FailInvCov, // inversion of cov.-matrix failed + FailInvWeight, // inversion of Ti weight matrix failed + FailInv2ndDeriv, // inversion of 2nd derivatives failed + FailCorrTracks, // correction of tracks to updated x failed + FailCloserAlt, // alternative PCA is closer +}; +``` +This is allows to track where candiate fit was abondended. +``` +int nc = ft.process(tr0,tr1,tr2); +auto status = ft.getFitStatus(); +if (nc) { + // status can either be FitStatus::Converged or FitStatus::MaxIter +} +// status can be on of the error conditions +``` +A more thorough example is given in `testDCAFitterN.cxx`. From 6963217343a8c835231e1494eae7508ac1badb74 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 10:59:24 +0200 Subject: [PATCH 0263/1764] Fix for TPC edge clusters in CTF decoding --- .../DataCompression/GPUTPCDecompressionKernels.cxx | 13 +++++++++++++ .../DataCompression/TPCClusterDecompressor.cxx | 13 +++++++++++++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 3 files changed, 27 insertions(+) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx index ee1a9c97cc30b..68e45f0c08c32 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx @@ -57,6 +57,19 @@ GPUdii() void GPUTPCDecompressionKernels::Thread= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]); TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout); + if (processors.param.rec.tpc.clustersEdgeFixDistance > 0.f) { + constexpr GPUTPCGeometry geo; + for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) { + auto& cluster = buffer[k]; + if (cluster.getFlags() & ClusterNative::flagEdge) { + auto padF = cluster.getPad(); + float distEdge = padF < geo.NPads(iRow) / 2 ? padF : geo.NPads(iRow) - 1 - padF; + if (distEdge > processors.param.rec.tpc.clustersEdgeFixDistance) { + cluster.setFlags(cluster.getFlags() ^ ClusterNative::flagEdge); + } + } + } + } if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) { for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) { auto& cl = buffer[k]; diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index 296a203cf070b..cd1717faf178d 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -94,6 +94,19 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom ClusterNative* clout = buffer + clusters[i][j].size(); uint32_t end = offsets[i][j] + ((i * GPUCA_ROW_COUNT + j >= clustersCompressed->nSliceRows) ? 0 : clustersCompressed->nSliceRowClusters[i * GPUCA_ROW_COUNT + j]); TPCClusterDecompressionCore::decompressHits(*clustersCompressed, offsets[i][j], end, clout); + if (param.rec.tpc.clustersEdgeFixDistance > 0.f) { + constexpr GPUTPCGeometry geo; + for (uint32_t k = 0; k < clustersNative.nClusters[i][j]; k++) { + auto& cluster = buffer[k]; + if (cluster.getFlags() & ClusterNative::flagEdge) { + auto padF = cluster.getPad(); + float distEdge = padF < geo.NPads(j) / 2 ? padF : geo.NPads(j) - 1 - padF; + if (distEdge > param.rec.tpc.clustersEdgeFixDistance) { + cluster.setFlags(cluster.getFlags() ^ ClusterNative::flagEdge); + } + } + } + } if (param.rec.tpc.clustersShiftTimebins != 0.f) { for (uint32_t k = 0; k < clustersNative.nClusters[i][j]; k++) { auto& cl = buffer[k]; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9d1772379f6bd..6858889f9a603 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -72,6 +72,7 @@ AddOptionRTC(tubeChi2, float, 5.f * 5.f, "", 0, "Max chi2 to mark cluster adjace AddOptionRTC(tubeMaxSize2, float, 2.5f * 2.5f, "", 0, "Square of max tube size (normally derrived from tpcTubeChi2)") AddOptionRTC(clustersShiftTimebins, float, 0, "", 0, "Shift of TPC clusters (applied during CTF cluster decoding)") AddOptionRTC(clustersShiftTimebinsClusterizer, float, 0, "", 0, "Shift of TPC clusters (applied during CTF clusterization)") +AddOptionRTC(clustersEdgeFixDistance, float, 0.f, "", 0, "If >0, revert cluster.flag edge bit distance to edge exceeds this parameter (fixed during CTF decoding)") AddOptionRTC(defaultZOffsetOverR, float, 0.5210953f, "", 0, "Shift of TPC clusters (applied during CTF cluster decoding)") AddOptionRTC(PID_EKrangeMin, float, 0.47f, "", 0, "min P of electron/K BB bands crossing") AddOptionRTC(PID_EKrangeMax, float, 0.57f, "", 0, "max P of electron/K BB bands crossing") From 186e9142adfd224096e1c84559a1a9d53c94bc87 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Sat, 19 Apr 2025 11:07:02 +0200 Subject: [PATCH 0264/1764] ctpdev: consistency checker fixes (#14202) * fix * clang --------- Co-authored-by: Roman Lietava --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index faa3dbac3e934..fa7fd673c7e85 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -636,8 +636,13 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, for (auto const& digit : digits) { // if class mask => inps for (int i = 0; i < digit.CTPClassMask.size(); i++) { - if (digit.CTPClassMask[i]) { + if (digit.CTPClassMask[i] & trgclassmask) { const CTPClass* cls = mCTPConfig.getCTPClassFromHWIndex(i); + if (cls == nullptr) { + LOG(error) << "Class mask index not found in CTP config:" << i; + ret = 128; + continue; + } uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { From fc8f84f350a60ef31a78e18310f1b1be84dd1137 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 18 Apr 2025 19:55:43 +0200 Subject: [PATCH 0265/1764] GPU RTC: Don't pass a third launch bounds parameter --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu index 571428dc39e21..3e4d3113fb995 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu @@ -15,7 +15,7 @@ #define GPUCA_GPUCODE_HOSTONLY #define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS -#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args)) +#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) #include "GPUReconstructionCUDAIncludesSystem.h" #include "GPUReconstructionCUDADef.h" From 1de28ad9206a381bcfc569c8f0c1675ada63db65 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 18 Apr 2025 22:51:43 +0200 Subject: [PATCH 0266/1764] GPU: Fix parameter that was forgotten when moving to if constexpr with GPUCA_PAR_... --- GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h | 3 +++ GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 4ee6b23d46b51..57ad9907ca86f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -574,6 +574,9 @@ #ifndef GPUCA_PAR_COMP_GATHER_MODE #define GPUCA_PAR_COMP_GATHER_MODE 0 #endif + #ifndef GPUCA_PAR_NO_ATOMIC_PRECHECK + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 0 + #endif #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE #define GPUCA_PAR_DEDX_STORAGE_TYPE float #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3b50bec45a41e..ddf01b586cd70 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -631,11 +631,11 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; -#if GPUCA_NO_ATOMIC_PRECHECK == 0 - if (myWeight <= *weight) { - continue; + if constexpr (GPUCA_PAR_NO_ATOMIC_PRECHECK == 0) { + if (myWeight <= *weight) { + continue; + } } -#endif const cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih); const float y = y0 + hh.x * stepY; const float z = z0 + hh.y * stepZ; From 8ffe16744647bf7792b3e04dfa9df8dbefcb8afd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 19 Apr 2025 12:28:36 +0200 Subject: [PATCH 0267/1764] GPU: Cleanup, remove obsolete code --- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 1 - .../Base/cuda/GPUReconstructionCUDA.cu | 18 ---------- .../Base/cuda/GPUReconstructionCUDA.h | 1 - .../Base/cuda/GPUReconstructionCUDAKernels.cu | 5 --- .../Base/opencl/GPUReconstructionOCL.cl | 2 -- GPU/GPUTracking/Definitions/GPUDef.h | 6 ---- .../Definitions/GPUDefParametersDefaults.h | 1 - .../Definitions/GPUDefParametersWrapper.h | 5 --- GPU/GPUTracking/Global/GPUChain.h | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 11 ------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 8 ++--- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 33 ++++--------------- .../SectorTracker/GPUTPCTrackingData.h | 8 +---- .../GPUTPCTrackletConstructor.cxx | 18 +++++----- .../SectorTracker/GPUTPCTrackletConstructor.h | 3 -- 15 files changed, 19 insertions(+), 102 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 163b00c804d7f..dfd6176827484 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -112,7 +112,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernels (size_t)deviceProp.maxTexture1DLinear) { - GPUError("Invalid maximum texture size of device: %ld < %ld\n", (int64_t)deviceProp.maxTexture1DLinear, (int64_t)(GPUCA_SECTOR_DATA_MEMORY * NSECTORS)); - return (1); - } -#endif #ifndef GPUCA_NO_CONSTANT_MEMORY if (gGPUConstantMemBufferSize > deviceProp.totalConstMem) { GPUError("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceProp.totalConstMem, (int32_t)gGPUConstantMemBufferSize); @@ -627,18 +621,6 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) } #ifndef __HIPCC__ // CUDA -int32_t GPUReconstructionCUDA::PrepareTextures() -{ -#ifdef GPUCA_USE_TEXTURES - cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); - size_t offset; - GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); - cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); -#endif - return (0); -} - void GPUReconstructionCUDA::startGPUProfiling() { GPUChkErr(cudaProfilerStart()); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index b1a3a53a6a62f..106168ef961a5 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -79,7 +79,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels gAliTexRefu2; -texture gAliTexRefu; -#endif - #include "GPUReconstructionIncludesDeviceAll.h" #if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 10a425e4c76e8..ffdc34d6c9881 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -73,8 +73,6 @@ typedef signed char int8_t; #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" -// if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO! - #define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__) #define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() GPUConstantMem* pConstant, #define GPUCA_CONSMEM (*pConstant) diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index c77b9ce159306..69723813d384f 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -58,12 +58,6 @@ #define CA_SHARED_CACHE_REF(target, src, size, reftype, ref) GPUglobalref() const reftype* __restrict__ ref = src #endif -#ifdef GPUCA_TEXTURE_FETCH_CONSTRUCTOR - #define CA_TEXTURE_FETCH(type, texture, address, entry) tex1Dfetch(texture, ((char*) address - tracker.Data().GPUTextureBase()) / sizeof(type) + entry); -#else - #define CA_TEXTURE_FETCH(type, texture, address, entry) address[entry]; -#endif - #endif //GPUTPCDEF_H #ifdef GPUCA_CADEBUG diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 57ad9907ca86f..19301ef2bef9f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -267,7 +267,6 @@ #define GPUCA_PAR_COMP_GATHER_MODE 3 #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half - // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else #error GPU TYPE NOT SET diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index b2c08d689aeb2..8a54ab2163eab 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -43,11 +43,6 @@ namespace o2::gpu // #define GPUCA_KERNEL_DEBUGGER_OUTPUT -// Derived parameters -#ifdef GPUCA_USE_TEXTURES - #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache -#endif - } // namespace o2::gpu // clang-format on diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 290ae32cafca8..c4dccb091fc95 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -216,7 +216,6 @@ class GPUChain inline GPUChain* GetNextChainInQueue() { return mRec->GetNextChainInQueue(); } - virtual int32_t PrepareTextures() { return 0; } virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; } template diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 64a9179baf0e6..962b0922eeecc 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -103,17 +103,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() int32_t streamInitAndOccMap = mRec->NStreams() - 1; if (doGPU) { - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); - // Initialize Startup Constants - processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; - processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase()); - } - - if (PrepareTextures()) { - return (2); - } - // Copy Tracker Object to GPU Memory if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Copying Tracker objects to GPU"); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index ddf01b586cd70..5bd8fd556aa3f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -580,10 +580,8 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric } const GPUTPCTracker& GPUrestrict() tracker = *(Merger -> GetConstantMem()->tpcTrackers + sector); const GPUTPCRow& GPUrestrict() row = tracker.Row(iRow); -#ifndef GPUCA_TEXTURE_FETCH_CONSTRUCTOR GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); -#endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR if (row.NHits() == 0) { return -1e6f; } @@ -626,8 +624,8 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric } for (int32_t k = 0; k <= nz; k++) { const int32_t mybin = bin + k * nBinsY; - const uint32_t hitFst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin); - const uint32_t hitLst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin + ny + 1); + const uint32_t hitFst = firsthit[mybin]; + const uint32_t hitLst = firsthit[mybin + ny + 1]; for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; @@ -636,7 +634,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric continue; } } - const cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih); + const cahit2 hh = hits[ih]; const float y = y0 + hh.x * stepY; const float z = z0 + hh.y * stepZ; const float dy = y - uncorrectedY; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index e8aac872198f5..3bebdc4fa2b06 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -59,24 +59,14 @@ class GPUTPCTracker : public GPUProcessor void DumpTrackletHits(std::ostream& out); // Same for Track Hits #endif - struct StructGPUParameters { - GPUAtomic(uint32_t) nextStartHit; // Next Tracklet to process - }; - - struct StructGPUParametersConst { - GPUglobalref() char* gpumem; // Base pointer to GPU memory (Needed for OpenCL for verification) - }; - struct commonMemoryStruct { - commonMemoryStruct() : nStartHits(0), nTracklets(0), nRowHits(0), nTracks(0), nLocalTracks(0), nTrackHits(0), nLocalTrackHits(0), gpuParameters() {} - GPUAtomic(uint32_t) nStartHits; // number of start hits - GPUAtomic(uint32_t) nTracklets; // number of tracklets - GPUAtomic(uint32_t) nRowHits; // number of tracklet hits - GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks - int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking - GPUAtomic(uint32_t) nTrackHits; // number of track hits - int32_t nLocalTrackHits; // see above - StructGPUParameters gpuParameters; // GPU parameters + GPUAtomic(uint32_t) nStartHits = 0; // number of start hits + GPUAtomic(uint32_t) nTracklets = 0; // number of tracklets + GPUAtomic(uint32_t) nRowHits = 0; // number of tracklet hits + GPUAtomic(uint32_t) nTracks = 0; // number of reconstructed tracks + int32_t nLocalTracks = 0; // number of reconstructed tracks before extrapolation tracking + GPUAtomic(uint32_t) nTrackHits = 0; // number of track hits + int32_t nLocalTrackHits = 0; // see above }; GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const @@ -205,13 +195,6 @@ class GPUTPCTracker : public GPUProcessor GPUhd() GPUglobalref() GPUTPCRow* TrackingDataRows() const { return (mData.Rows()); } GPUhd() GPUglobalref() int32_t* RowStartHitCountOffset() const { return (mRowStartHitCountOffset); } - GPUhd() GPUglobalref() StructGPUParameters* GPUParameters() const { return (&mCommonMem->gpuParameters); } - GPUhd() StructGPUParametersConst* GPUParametersConst() - { - return (&mGPUParametersConst); - } - GPUhd() const StructGPUParametersConst* GetGPUParametersConst() const { return (&mGPUParametersConst); } - GPUhd() void SetGPUTextureBase(GPUglobalref() const void* val) { mData.SetGPUTextureBase(val); } struct trackSortData { int32_t fTtrack; // Track ID @@ -253,8 +236,6 @@ class GPUTPCTracker : public GPUProcessor GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits = nullptr; // Unsorted start hits GPUglobalref() char* mGPUTrackletTemp = nullptr; // Temp Memory for GPU Tracklet Constructor - StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker - // event GPUglobalref() commonMemoryStruct* mCommonMem = nullptr; // common event memory GPUglobalref() GPUTPCHitId* mTrackletStartHits = nullptr; // start hits for the tracklets diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h index d7d5e76bc9d44..b08fbed4b319d 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h @@ -29,7 +29,7 @@ class GPUTPCHit; class GPUTPCTrackingData { public: - GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} + GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} #ifndef GPUCA_GPUCODE_DEVICE ~GPUTPCTrackingData() = default; @@ -112,10 +112,6 @@ class GPUTPCTrackingData GPUhdi() GPUglobalref() GPUAtomic(uint32_t) * HitWeights() { return (mHitWeights); } - GPUhdi() void SetGPUTextureBase(GPUglobalref() const void* val) { mGPUTextureBase = val; } - GPUhdi() char* GPUTextureBase() const { return ((char*)mGPUTextureBase); } - GPUhdi() char* GPUTextureBaseConst() const { return ((char*)mGPUTextureBase); } - GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mClusterData; } private: @@ -135,8 +131,6 @@ class GPUTPCTrackingData int32_t mNumberOfHitsPlusAlign; int32_t mClusterIdOffset; - GPUglobalref() const void* mGPUTextureBase; // pointer to start of GPU texture - GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 71df683eee1dc..2660f6d8cbf44 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -123,10 +123,10 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, break; // SG!!! - jump over the row } - cahit2 hh = CA_TEXTURE_FETCH(cahit22, gAliTexRefu2, tracker.HitData(row), r.mCurrIH); + cahit2 hh = tracker.HitData(row)[r.mCurrIH]; int32_t seedIH = r.mCurrIH; - r.mCurrIH = CA_TEXTURE_FETCH(calink, gAliTexRefs, tracker.HitLinkUpData(row), r.mCurrIH); + r.mCurrIH = tracker.HitLinkUpData(row)[r.mCurrIH]; float x = row.X(); float y = y0 + hh.x * stepY; @@ -282,10 +282,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, break; } -#ifndef GPUCA_TEXTURE_FETCH_CONSTRUCTOR GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); -#endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISector(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); if (tracker.Param().rec.tpc.rejectEdgeClustersInSeeding && tracker.Param().rejectEdgeClusterByY(yUncorrected, iRow, CAMath::Sqrt(tParam.Err2Y()))) { @@ -318,14 +316,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, #endif int32_t nBinsY = row.Grid().Ny(); int32_t mybin = bin + k * nBinsY; - uint32_t hitFst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin); - uint32_t hitLst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin + ny + 1); + uint32_t hitFst = firsthit[mybin]; + uint32_t hitLst = firsthit[mybin + ny + 1]; #ifdef __HIPCC__ // Todo: fixme! for (uint32_t ih = hitFst - 1; ++ih < hitLst; /*ih++*/) { #else for (uint32_t ih = hitFst; ih < hitLst; ih++) { #endif - cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih); + cahit2 hh = hits[ih]; float y = y0 + hh.x * stepY; float z = z0 + hh.y * stepZ; float dy = y - yUncorrected; @@ -353,7 +351,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } - cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, best); + cahit2 hh = hits[best]; float y = y0 + hh.x * stepY + tParam.GetY() - yUncorrected; float z = z0 + hh.y * stepZ + tParam.GetZ() - zUncorrected; @@ -390,8 +388,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, const GPUglobalref() GPUTPCRow& GPUrestrict() row2 = tracker.Row(r.mLastRow); GPUglobalref() const cahit2* hits1 = tracker.HitData(row1); GPUglobalref() const cahit2* hits2 = tracker.HitData(row2); - const cahit2 hh1 = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits1, rowHits[r.mFirstRow]); - const cahit2 hh2 = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits2, rowHits[r.mLastRow]); + const cahit2 hh1 = hits1[rowHits[r.mFirstRow]]; + const cahit2 hh2 = hits2[rowHits[r.mLastRow]]; const float z1 = row1.Grid().ZMin() + hh1.y * row1.HstepZ(); const float z2 = row2.Grid().ZMin() + hh2.y * row2.HstepZ(); float oldOffset = tParam.ZOffset(); diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index af87d0276f1c7..031c32b2b4334 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -63,9 +63,6 @@ class GPUTPCTrackletConstructor : public GPUKernelTemplate struct GPUSharedMemory { CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows - int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration - int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration - int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? int32_t mNStartHits; // Total number of start hits #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE From 497d53fdd0e4daaae9cf526a3c1988f8684728a9 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Sun, 20 Apr 2025 12:22:22 +0200 Subject: [PATCH 0268/1764] GPU stream implementation for ONNX runtime (#14117) * Initial set of bug.fixes and cosmetic changes * Please consider the following formatting changes * Adjusting eval sizes. Makes code neater and avoids some calculations * Adding separate functions. Now the host process only needs one instance and one initialization * First version of CCDB implementation * Working CCDB API calls (tested with test-ccdb) * Improve fetching, but have to pass settings by value, not const ref * Using const ref and moving CCDB calls to host initialization * Simplifications and renaming * Please consider the following formatting changes * First version of GPU stream implementation. Still needs testing. * Fixes * Please consider the following formatting changes * Adding the lane variable. This PR will in any case conflict with #14069 * Compiles on EPNs. Need to add shadow processors next. But for this, I will merge https://github.com/AliceO2Group/AliceO2/pull/14069 to have the changes in GPUChainTrackingClusterizer. * Adding shadow instance. Not sure if this correctly allocates GPU memory using AllocateRegisteredMemory * This runs, but will eventually fill up the VRAM. Need to include a mem clean * Found the stream allocation issue. Now starting optimizations * Improve readability and adapt for some comments * Fixing memory assignment issue. Reconstruction runs through with FP32 networks * Major reworkings to add FP16 support * Bug-fixes * Improved data filling speeds by factor 3 * Limiting threads for ONNX evaluation * Bug-fix for correct thread assignment and input data filling * Minor changes * Adding I** inference, potentally needed for CNN + FC inference * CCDB fetching of NNs ported to GPUWorkflowSpec * Adjusting CPU threads and ORT copmile definitions * About 10x speed-up due to explicit io binding * Changes for synchronization and consistency. No performance loss. * Please consider the following formatting changes * Fixing warnings (errors due to size_t) * Fixing linker issues * Adding volatile memory allocation and MockedOrtAllocator. Removing print statements and time measurements * Please consider the following formatting changes * Circumvent "unused result" warning and build failure * Adjust for comments * Please consider the following formatting changes * Fixing build flags --------- Co-authored-by: ALICE Action Bot --- Common/ML/CMakeLists.txt | 21 +- Common/ML/include/ML/3rdparty/GPUORTFloat16.h | 2 +- Common/ML/include/ML/OrtInterface.h | 86 +++- Common/ML/src/OrtInterface.cxx | 410 +++++++++++++----- Detectors/TPC/calibration/CMakeLists.txt | 2 + .../TPCCalibration/NeuralNetworkClusterizer.h | 38 ++ .../src/NeuralNetworkClusterizer.cxx | 48 ++ GPU/GPUTracking/Base/GPUReconstructionCPU.h | 8 + .../Base/GPUReconstructionProcessing.h | 5 + GPU/GPUTracking/Base/cuda/CMakeLists.txt | 6 + .../Base/cuda/GPUReconstructionCUDA.cu | 42 ++ .../Base/cuda/GPUReconstructionCUDA.h | 6 + GPU/GPUTracking/Base/hip/CMakeLists.txt | 6 + GPU/GPUTracking/CMakeLists.txt | 1 + .../Definitions/GPUDefParametersDefaults.h | 1 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 14 +- GPU/GPUTracking/Global/GPUChain.h | 1 + .../Global/GPUChainTrackingClusterizer.cxx | 250 ++++++++--- .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 78 +++- .../TPCClusterFinder/GPUTPCNNClusterizer.h | 33 +- .../GPUTPCNNClusterizerHost.cxx | 191 +++++++- .../GPUTPCNNClusterizerHost.h | 39 +- .../GPUTPCNNClusterizerKernels.cxx | 393 ++++++++++------- .../GPUTPCNNClusterizerKernels.h | 10 +- GPU/GPUTracking/kernels.cmake | 1 + .../include/GPUWorkflow/GPUWorkflowSpec.h | 3 + GPU/Workflow/src/GPUWorkflowSpec.cxx | 45 ++ 27 files changed, 1301 insertions(+), 439 deletions(-) create mode 100644 Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h create mode 100644 Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index 540fe8ebf271c..2db91fc4f4320 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -9,21 +9,14 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -# Pass ORT variables as a preprocessor definition -if(ORT_ROCM_BUILD) - add_compile_definitions(ORT_ROCM_BUILD=1) -endif() -if(ORT_CUDA_BUILD) - add_compile_definitions(ORT_CUDA_BUILD=1) -endif() -if(ORT_MIGRAPHX_BUILD) - add_compile_definitions(ORT_MIGRAPHX_BUILD=1) -endif() -if(ORT_TENSORRT_BUILD) - add_compile_definitions(ORT_TENSORRT_BUILD=1) -endif() - o2_add_library(ML SOURCES src/OrtInterface.cxx TARGETVARNAME targetName PRIVATE_LINK_LIBRARIES O2::Framework ONNXRuntime::ONNXRuntime) + +# Pass ORT variables as a preprocessor definition +target_compile_definitions(${targetName} PRIVATE + $<$:ORT_ROCM_BUILD> + $<$:ORT_CUDA_BUILD> + $<$:ORT_MIGRAPHX_BUILD> + $<$:ORT_TENSORRT_BUILD>) diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h index 76fd6734cf9db..9516ba5dad573 100644 --- a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h +++ b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h @@ -882,4 +882,4 @@ static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match"); } // namespace OrtDataType } // namespace o2 -#endif \ No newline at end of file +#endif diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index 93549178848ca..e37b6a69b6036 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -26,6 +26,13 @@ // O2 includes #include "Framework/Logger.h" +namespace Ort +{ +struct SessionOptions; +struct MemoryInfo; +struct Env; +} // namespace Ort + namespace o2 { @@ -36,14 +43,52 @@ class OrtModel { public: - // Constructor + // Constructors & destructors OrtModel() = default; - OrtModel(std::unordered_map optionsMap) { reset(optionsMap); } - void init(std::unordered_map optionsMap) { reset(optionsMap); } - void reset(std::unordered_map); + OrtModel(std::unordered_map optionsMap) { init(optionsMap); } + void init(std::unordered_map optionsMap) + { + initOptions(optionsMap); + initEnvironment(); + } + virtual ~OrtModel() = default; + + // General purpose + void initOptions(std::unordered_map optionsMap); + void initEnvironment(); + void initSession(); + void memoryOnDevice(int32_t = 0); bool isInitialized() { return mInitialized; } + void resetSession(); - virtual ~OrtModel() = default; + // Getters + std::vector> getNumInputNodes() const { return mInputShapes; } + std::vector> getNumOutputNodes() const { return mOutputShapes; } + std::vector getInputNames() const { return mInputNames; } + std::vector getOutputNames() const { return mOutputNames; } + Ort::SessionOptions* getSessionOptions(); + Ort::MemoryInfo* getMemoryInfo(); + Ort::Env* getEnv(); + int32_t getIntraOpNumThreads() const { return intraOpNumThreads; } + int32_t getInterOpNumThreads() const { return interOpNumThreads; } + + // Setters + void setDeviceId(int32_t id) { deviceId = id; } + void setIO(); + void setActiveThreads(int threads) { intraOpNumThreads = threads; } + void setIntraOpNumThreads(int threads) + { + if (deviceType == "CPU") { + intraOpNumThreads = threads; + } + } + void setInterOpNumThreads(int threads) + { + if (deviceType == "CPU") { + interOpNumThreads = threads; + } + } + void setEnv(Ort::Env*); // Conversion template @@ -53,41 +98,36 @@ class OrtModel template // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h std::vector inference(std::vector&); - template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h + template std::vector inference(std::vector>&); - template // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h - void inference(I*, size_t, O*); - - // template // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type - // std::vector inference(std::vector&); - - // Reset session - void resetSession(); + template + void inference(I*, int64_t, O*); - std::vector> getNumInputNodes() const { return mInputShapes; } - std::vector> getNumOutputNodes() const { return mOutputShapes; } - std::vector getInputNames() const { return mInputNames; } - std::vector getOutputNames() const { return mOutputNames; } + template + void inference(I**, int64_t, O*); - void setActiveThreads(int threads) { intraOpNumThreads = threads; } + void release(bool = false); private: - // ORT variables -> need to be hidden as Pimpl + // ORT variables -> need to be hidden as pImpl struct OrtVariables; OrtVariables* pImplOrt; // Input & Output specifications of the loaded network std::vector inputNamesChar, outputNamesChar; std::vector mInputNames, mOutputNames; - std::vector> mInputShapes, mOutputShapes; + std::vector> mInputShapes, mOutputShapes, inputShapesCopy, outputShapesCopy; // Input shapes + std::vector inputSizePerNode, outputSizePerNode; // Output shapes + int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs // Environment settings bool mInitialized = false; - std::string modelPath, device = "cpu", dtype = "float", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda - int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; + std::string modelPath, envName = "", deviceType = "CPU", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda + int32_t intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = -1, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; std::string printShape(const std::vector&); + std::string printShape(const std::vector>&, std::vector&); }; } // namespace ml diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index 88f548bd4fe7b..24a2fbffb252c 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -33,11 +33,12 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c Ort::SessionOptions sessionOptions; Ort::AllocatorWithDefaultOptions allocator; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault); + std::unique_ptr ioBinding = nullptr; }; -void OrtModel::reset(std::unordered_map optionsMap) +// General purpose +void OrtModel::initOptions(std::unordered_map optionsMap) { - pImplOrt = new OrtVariables(); // Load from options map @@ -47,77 +48,60 @@ void OrtModel::reset(std::unordered_map optionsMap) if (!optionsMap["model-path"].empty()) { modelPath = optionsMap["model-path"]; - device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU"); - dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float"); - deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0); + deviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU"); + deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1); allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); - - std::string dev_mem_str = "Hip"; -#if defined(ORT_ROCM_BUILD) - if (device == "ROCM") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) ROCM execution provider set"; - } -#endif -#if defined(ORT_MIGRAPHX_BUILD) - if (device == "MIGRAPHX") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) MIGraphX execution provider set"; - } -#endif -#if defined(ORT_CUDA_BUILD) - if (device == "CUDA") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) CUDA execution provider set"; - dev_mem_str = "Cuda"; + envName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference"); + + if (deviceType == "CPU") { + (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); + (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); + if (intraOpNumThreads > 1 || interOpNumThreads > 1) { + (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); + } else if (intraOpNumThreads == 1) { + (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + } + if (loggingLevel < 2) { + LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; + } } -#endif - if (allocateDeviceMemory) { - pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault); - LOG(info) << "(ORT) Memory info set to on-device memory"; - } + // OrtROCMProviderOptions rocm_options{}; + // (pImplOrt->sessionOptions).AppendExecutionProvider_ROCM(rocm_options); - if (device == "CPU") { - (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); - (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); - if (intraOpNumThreads > 1 || interOpNumThreads > 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); - } else if (intraOpNumThreads == 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); - } - if (loggingLevel < 2) { - LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; - } - } - - (pImplOrt->sessionOptions).DisableMemPattern(); - (pImplOrt->sessionOptions).DisableCpuMemArena(); + (pImplOrt->sessionOptions).DisableMemPattern(); + (pImplOrt->sessionOptions).DisableCpuMemArena(); - if (enableProfiling) { - if (optionsMap.contains("profiling-output-path")) { - (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); + if (enableProfiling) { + if (optionsMap.contains("profiling-output-path")) { + (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); + } else { + LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now."; + (pImplOrt->sessionOptions).DisableProfiling(); + } } else { - LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now."; (pImplOrt->sessionOptions).DisableProfiling(); } - } else { - (pImplOrt->sessionOptions).DisableProfiling(); - } - mInitialized = true; + (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); + (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); - (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); - (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); + mInitialized = true; + } else { + LOG(fatal) << "(ORT) Model path cannot be empty!"; + } +} +void OrtModel::initEnvironment() +{ pImplOrt->env = std::make_shared( OrtLoggingLevel(loggingLevel), - (optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()), + (envName.empty() ? "ORT" : envName.c_str()), // Integrate ORT logging into Fairlogger [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) { if (severity == ORT_LOGGING_LEVEL_VERBOSE) { @@ -136,31 +120,48 @@ void OrtModel::reset(std::unordered_map optionsMap) }, (void*)3); (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events - pImplOrt->session = std::make_shared(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions); +} - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get()); - } - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); - } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get()); - } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); +void OrtModel::initSession() +{ + if (allocateDeviceMemory) { + memoryOnDevice(deviceId); } + pImplOrt->session = std::make_shared(*pImplOrt->env, modelPath.c_str(), pImplOrt->sessionOptions); + pImplOrt->ioBinding = std::make_unique(*pImplOrt->session); + + setIO(); - inputNamesChar.resize(mInputNames.size(), nullptr); - std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar), - [&](const std::string& str) { return str.c_str(); }); - outputNamesChar.resize(mOutputNames.size(), nullptr); - std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), - [&](const std::string& str) { return str.c_str(); }); - } if (loggingLevel < 2) { - LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")"; + LOG(info) << "(ORT) Model loaded successfully! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")"; + } +} + +void OrtModel::memoryOnDevice(int32_t deviceIndex) +{ +#if (defined(ORT_ROCM_BUILD) || defined(ORT_MIGRAPHX_BUILD) || defined(ORT_CUDA_BUILD) || defined(ORT_TENSORRT_BUILD)) + if (deviceIndex >= 0) { + (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); + (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h + (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (pImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + // Arena memory shrinkage comes at performance cost + /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; + // (pImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + + std::string dev_mem_str = ""; + if (deviceType == "ROCM") { + dev_mem_str = "Hip"; + } + if (deviceType == "CUDA") { + dev_mem_str = "Cuda"; + } + pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault); + if (loggingLevel < 2) { + LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt; + } } +#endif } void OrtModel::resetSession() @@ -168,6 +169,22 @@ void OrtModel::resetSession() pImplOrt->session = std::make_shared(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions); } +// Getters +Ort::SessionOptions* OrtModel::getSessionOptions() +{ + return &pImplOrt->sessionOptions; +} + +Ort::MemoryInfo* OrtModel::getMemoryInfo() +{ + return &pImplOrt->memoryInfo; +} + +Ort::Env* OrtModel::getEnv() +{ + return (pImplOrt->env).get(); +} + template std::vector OrtModel::v2v(std::vector& input, bool clearInput) { @@ -183,20 +200,70 @@ std::vector OrtModel::v2v(std::vector& input, bool clearInput) } } -std::string OrtModel::printShape(const std::vector& v) +void OrtModel::setIO() { - std::stringstream ss(""); - for (size_t i = 0; i < v.size() - 1; i++) { - ss << v[i] << "x"; + for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { + mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get()); } - ss << v[v.size() - 1]; - return ss.str(); + for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { + mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + } + for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { + mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get()); + } + for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { + mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + } + + inputNamesChar.resize(mInputNames.size(), nullptr); + std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar), + [&](const std::string& str) { return str.c_str(); }); + outputNamesChar.resize(mOutputNames.size(), nullptr); + std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), + [&](const std::string& str) { return str.c_str(); }); + + inputShapesCopy = mInputShapes; + outputShapesCopy = mOutputShapes; + inputSizePerNode.resize(mInputShapes.size(), 1); + outputSizePerNode.resize(mOutputShapes.size(), 1); + mInputsTotal = 1; + for (size_t i = 0; i < mInputShapes.size(); ++i) { + if (mInputShapes[i].size() > 0) { + for (size_t j = 1; j < mInputShapes[i].size(); ++j) { + if (mInputShapes[i][j] > 0) { + mInputsTotal *= mInputShapes[i][j]; + inputSizePerNode[i] *= mInputShapes[i][j]; + } + } + } + } + mOutputsTotal = 1; + for (size_t i = 0; i < mOutputShapes.size(); ++i) { + if (mOutputShapes[i].size() > 0) { + for (size_t j = 1; j < mOutputShapes[i].size(); ++j) { + if (mOutputShapes[i][j] > 0) { + mOutputsTotal *= mOutputShapes[i][j]; + outputSizePerNode[i] *= mOutputShapes[i][j]; + } + } + } + } +} + +void OrtModel::setEnv(Ort::Env* env) +{ + pImplOrt->env = std::shared_ptr(env); } +// Inference template std::vector OrtModel::inference(std::vector& input) { - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + std::vector inputShape = mInputShapes[0]; + inputShape[0] = input.size(); + for (size_t i = 1; i < mInputShapes[0].size(); ++i) { + inputShape[0] /= mInputShapes[0][i]; + } std::vector inputTensor; if constexpr (std::is_same_v) { inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); @@ -212,51 +279,182 @@ std::vector OrtModel::inference(std::vector& input) } template std::vector OrtModel::inference(std::vector&); - template std::vector OrtModel::inference(std::vector&); - template std::vector OrtModel::inference(std::vector&); template -void OrtModel::inference(I* input, size_t input_size, O* output) +void OrtModel::inference(I* input, int64_t input_size, O* output) { - std::vector inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + // std::vector providers = Ort::GetAvailableProviders(); + // for (const auto& provider : providers) { + // LOG(info) << "Available Execution Provider: " << provider; + // } + std::vector inputShape{input_size, (int64_t)mInputShapes[0][1]}; Ort::Value inputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size, inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } else { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } + (pImplOrt->ioBinding)->BindInput(mInputNames[0].c_str(), inputTensor); - std::vector outputShape{inputShape[0], mOutputShapes[0][1]}; - size_t outputSize = (int64_t)(input_size * mOutputShapes[0][1] / mInputShapes[0][1]); - Ort::Value outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size()); + std::vector outputShape{input_size, mOutputShapes[0][1]}; + Ort::Value outputTensor = Ort::Value(nullptr); + if constexpr (std::is_same_v) { + outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + } else { + outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + } + (pImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); - (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size()); // TODO: Not sure if 1 is correct here + (pImplOrt->session)->Run(pImplOrt->runOptions, *pImplOrt->ioBinding); } -template void OrtModel::inference(OrtDataType::Float16_t*, size_t, float*); +template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, float*); +template void OrtModel::inference(float*, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(float*, int64_t, float*); + +template +void OrtModel::inference(I** input, int64_t input_size, O* output) +{ + std::vector inputTensors(inputShapesCopy.size()); + + for (size_t i = 0; i < inputShapesCopy.size(); ++i) { + + inputShapesCopy[i][0] = input_size; // batch-size + outputShapesCopy[i][0] = input_size; // batch-size + + if constexpr (std::is_same_v) { + inputTensors[i] = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + reinterpret_cast(input[i]), + inputSizePerNode[i] * input_size, + inputShapesCopy[i].data(), + inputShapesCopy[i].size()); + } else { + inputTensors[i] = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + input[i], + inputSizePerNode[i] * input_size, + inputShapesCopy[i].data(), + inputShapesCopy[i].size()); + } + } + + Ort::Value outputTensor = Ort::Value(nullptr); + if constexpr (std::is_same_v) { + outputTensor = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + reinterpret_cast(output), + outputSizePerNode[0] * input_size, // assumes that there is only one output node + outputShapesCopy[0].data(), + outputShapesCopy[0].size()); + } else { + outputTensor = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + output, + outputSizePerNode[0] * input_size, // assumes that there is only one output node + outputShapesCopy[0].data(), + outputShapesCopy[0].size()); + } + + // === Run inference === + pImplOrt->session->Run( + pImplOrt->runOptions, + inputNamesChar.data(), + inputTensors.data(), + inputNamesChar.size(), + outputNamesChar.data(), + &outputTensor, + outputNamesChar.size()); +} -template void OrtModel::inference(float*, size_t, float*); +template void OrtModel::inference(OrtDataType::Float16_t**, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(OrtDataType::Float16_t**, int64_t, float*); +template void OrtModel::inference(float**, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(float**, int64_t, float*); template -std::vector OrtModel::inference(std::vector>& input) +std::vector OrtModel::inference(std::vector>& inputs) { - std::vector inputTensor; - for (auto i : input) { - std::vector inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + std::vector input_tensors; + + for (size_t i = 0; i < inputs.size(); ++i) { + + inputShapesCopy[i][0] = inputs[i].size() / inputSizePerNode[i]; // batch-size + if constexpr (std::is_same_v) { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); + input_tensors.emplace_back( + Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + reinterpret_cast(inputs[i].data()), + inputSizePerNode[i] * inputShapesCopy[i][0], + inputShapesCopy[i].data(), + inputShapesCopy[i].size())); } else { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, i.data(), i.size(), inputShape.data(), inputShape.size())); + input_tensors.emplace_back( + Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + inputs[i].data(), + inputSizePerNode[i] * inputShapesCopy[i][0], + inputShapesCopy[i].data(), + inputShapesCopy[i].size())); } } - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; + + int32_t totalOutputSize = mOutputsTotal * inputShapesCopy[0][0]; + + // === Run inference === + auto output_tensors = pImplOrt->session->Run( + pImplOrt->runOptions, + inputNamesChar.data(), + input_tensors.data(), + input_tensors.size(), + outputNamesChar.data(), + outputNamesChar.size()); + + // === Extract output values === + O* output_data = output_tensors[0].template GetTensorMutableData(); + std::vector output_vec(output_data, output_data + totalOutputSize); + output_tensors.clear(); + return output_vec; +} + +template std::vector OrtModel::inference(std::vector>&); +template std::vector OrtModel::inference(std::vector>&); + +// Release session +void OrtModel::release(bool profilingEnabled) +{ + // if (profilingEnabled) { + // pImplOrt->session->EndProfiling(); + // } + LOG(info) << "(ORT) Size of pImplOrt: " << sizeof(*pImplOrt) << " bytes"; +} + +// private +std::string OrtModel::printShape(const std::vector& v) +{ + std::stringstream ss(""); + for (size_t i = 0; i < v.size() - 1; i++) { + ss << v[i] << "x"; + } + ss << v[v.size() - 1]; + return ss.str(); +} + +std::string OrtModel::printShape(const std::vector>& v, std::vector& n) +{ + std::stringstream ss(""); + for (size_t i = 0; i < v.size(); i++) { + ss << n[i] << " -> ("; + for (size_t j = 0; j < v[i].size() - 1; j++) { + ss << v[i][j] << "x"; + } + ss << v[i][v[i].size() - 1] << "); "; + } + return ss.str(); } } // namespace ml diff --git a/Detectors/TPC/calibration/CMakeLists.txt b/Detectors/TPC/calibration/CMakeLists.txt index 0ec62e5f323b3..7722fc4e2884f 100644 --- a/Detectors/TPC/calibration/CMakeLists.txt +++ b/Detectors/TPC/calibration/CMakeLists.txt @@ -25,6 +25,7 @@ o2_add_library(TPCCalibration src/CalibPadGainTracksBase.cxx src/CalibLaserTracks.cxx src/LaserTracksCalibrator.cxx + src/NeuralNetworkClusterizer.cxx src/SACDecoder.cxx src/IDCAverageGroup.cxx src/IDCAverageGroupBase.cxx @@ -82,6 +83,7 @@ o2_target_root_dictionary(TPCCalibration include/TPCCalibration/FastHisto.h include/TPCCalibration/CalibLaserTracks.h include/TPCCalibration/LaserTracksCalibrator.h + include/TPCCalibration/NeuralNetworkClusterizer.h include/TPCCalibration/SACDecoder.h include/TPCCalibration/IDCAverageGroup.h include/TPCCalibration/IDCAverageGroupBase.h diff --git a/Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h b/Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h new file mode 100644 index 0000000000000..196bba644714c --- /dev/null +++ b/Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h @@ -0,0 +1,38 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file NeuralNetworkClusterizer.h +/// \brief Fetching neural networks for clusterization from CCDB +/// \author Christian Sonnabend + +#ifndef AliceO2_TPC_NeuralNetworkClusterizer_h +#define AliceO2_TPC_NeuralNetworkClusterizer_h + +#include "CCDB/CcdbApi.h" + +namespace o2::tpc +{ + +class NeuralNetworkClusterizer +{ + public: + NeuralNetworkClusterizer() = default; + void initCcdbApi(std::string url); + void loadIndividualFromCCDB(std::map settings); + + private: + o2::ccdb::CcdbApi ccdbApi; + std::map metadata; + std::map headers; +}; + +} // namespace o2::tpc +#endif diff --git a/Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx b/Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx new file mode 100644 index 0000000000000..bfbb7afc946f8 --- /dev/null +++ b/Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx @@ -0,0 +1,48 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file NeuralNetworkClusterizer.cxx +/// \brief Fetching neural networks for clusterization from CCDB +/// \author Christian Sonnabend + +#include +#include "TPCCalibration/NeuralNetworkClusterizer.h" + +using namespace o2::tpc; + +void NeuralNetworkClusterizer::initCcdbApi(std::string url) +{ + ccdbApi.init(url); +} + +void NeuralNetworkClusterizer::loadIndividualFromCCDB(std::map settings) +{ + metadata["inputDType"] = settings["inputDType"]; + metadata["outputDType"] = settings["outputDType"]; + metadata["nnCCDBEvalType"] = settings["nnCCDBEvalType"]; // classification_1C, classification_2C, regression_1C, regression_2C + metadata["nnCCDBWithMomentum"] = settings["nnCCDBWithMomentum"]; // 0, 1 -> Only for regression model + metadata["nnCCDBLayerType"] = settings["nnCCDBLayerType"]; // FC, CNN + if (settings["nnCCDBInteractionRate"] != "" && std::stoi(settings["nnCCDBInteractionRate"]) > 0) { + metadata["nnCCDBInteractionRate"] = settings["nnCCDBInteractionRate"]; + } + if (settings["nnCCDBBeamType"] != "") { + metadata["nnCCDBBeamType"] = settings["nnCCDBBeamType"]; + } + + bool retrieveSuccess = ccdbApi.retrieveBlob(settings["nnCCDBPath"], settings["outputFolder"], metadata, 1, false, settings["outputFile"]); + // headers = ccdbApi.retrieveHeaders(settings["nnPathCCDB"], metadata, 1); // potentially needed to init some local variables + + if (retrieveSuccess) { + LOG(info) << "Network " << settings["nnCCDBPath"] << " retrieved from CCDB, stored at " << settings["outputFile"]; + } else { + LOG(error) << "Failed to retrieve network from CCDB"; + } +} diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index dfd6176827484..b37bf2b75f01c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -24,6 +24,11 @@ #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionKernels.h" +namespace Ort +{ +struct SessionOptions; +} + namespace o2::gpu { @@ -108,6 +113,9 @@ class GPUReconstructionCPU : public GPUReconstructionKernels #include +namespace Ort +{ +struct SessionOptions; +} + namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 3655eaf66055e..f595fb051db54 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -114,6 +114,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA + PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -121,6 +122,11 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) + target_compile_definitions(${targetName} PRIVATE + GPUCA_HAS_ONNX=1 + $<$:ORT_CUDA_BUILD> + $<$:ORT_TENSORRT_BUILD>) + install(FILES ${HDRS} DESTINATION include/GPU) endif() diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 3b5e257cc8000..fe2906caace80 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -37,6 +37,10 @@ #undef GPUCA_KRNL #endif +#ifdef GPUCA_HAS_ONNX +#include +#endif + static constexpr size_t REQUIRE_MIN_MEMORY = 1024L * 1024 * 1024; static constexpr size_t REQUIRE_MEMORY_RESERVED = 512L * 1024 * 1024; static constexpr size_t REQUIRE_FREE_MEMORY_RESERVED_PER_SM = 40L * 1024 * 1024; @@ -630,6 +634,28 @@ void GPUReconstructionCUDA::endGPUProfiling() { GPUChkErr(cudaProfilerStop()); } + +void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) +{ +#ifdef ORT_CUDA_BUILD + cudaGetDevice(deviceId); + OrtCUDAProviderOptionsV2* cuda_options = nullptr; + CreateCUDAProviderOptions(&cuda_options); + + // std::vector keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"}; + // std::vector values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"}; + // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); + + // this implicitly sets "has_user_compute_stream" + cuda_options.has_user_compute_stream = 1; + UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream]); + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); + + // Finally, don't forget to release the provider options + ReleaseCUDAProviderOptions(cuda_options); +#endif // ORT_CUDA_BUILD +} + #else // HIP void* GPUReconstructionHIP::getGPUPointer(void* ptr) { @@ -637,6 +663,22 @@ void* GPUReconstructionHIP::getGPUPointer(void* ptr) GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); return retVal; } + +void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) +{ +#ifdef ORT_ROCM_BUILD + // Create ROCm provider options + cudaGetDevice(deviceId); + // const auto& api = Ort::GetApi(); + // api.GetCurrentGpuDeviceId(deviceId); + OrtROCMProviderOptions rocm_options; + rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream + rocm_options.arena_extend_strategy = 0; // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code + // rocm_options.gpu_mem_limit = 1073741824; // 0 means no limit + rocm_options.user_compute_stream = mInternals->Streams[stream]; + session_options.AppendExecutionProvider_ROCM(rocm_options); +#endif // ORT_ROCM_BUILD +} #endif // __HIPCC__ namespace o2::gpu diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 106168ef961a5..2fc4d14bba491 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -25,6 +25,11 @@ extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_C extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg); #endif +namespace Ort +{ +struct SessionOptions; +} + namespace o2::gpu { struct GPUReconstructionCUDAInternals; @@ -74,6 +79,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels* trackerTraits, std::unique_ptr* vertexerTraits, std::unique_ptr* timeFrame) override; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 3a03a054d4a7e..d7adb222d547b 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -162,6 +162,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP + PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -169,6 +170,11 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) + target_compile_definitions(${targetName} PRIVATE + GPUCA_HAS_ONNX=1 + $<$:ORT_ROCM_BUILD> + $<$:ORT_MIGRAPHX_BUILD>) + install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index eaeec508ff27a..e82799b9e59c3 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -341,6 +341,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML + PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 19301ef2bef9f..a56fb97771fe5 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -492,6 +492,7 @@ #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNNSingleElement GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 6858889f9a603..1106f96ed1cb2 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -248,7 +248,8 @@ AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)") AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id") AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference") -AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16 +AddOption(nnInferenceInputDType, std::string, "FP32", "", 0, "(std::string) Specify the datatype for which inference is performed (FP32: default, fp16)") // fp32 or fp16 +AddOption(nnInferenceOutputDType, std::string, "FP32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16 AddOption(nnInferenceIntraOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetIntraOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") AddOption(nnInferenceInterOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetInterOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347") @@ -269,6 +270,17 @@ AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The c AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.") AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path") AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).") +AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)") +// CCDB +AddOption(nnLoadFromCCDB, int, 1, "", 0, "If 1 networks are fetched from ccdb, else locally") +AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched") +AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched") +AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks") +AddOption(nnCCDBWithMomentum, int, 1, "", 0, "Distinguishes between the network with and without momentum output for the regression") +AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN") +AddOption(nnCCDBRegressionLayerType, std::string, "CNN", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN") +AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp") +AddOption(nnCCDBInteractionRate, int, 50, "", 0, "Distinguishes between networks for different interaction rates [kHz].") AddHelp("help", 'h') EndConfig() diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index c4dccb091fc95..1e99e3b73736f 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -83,6 +83,7 @@ class GPUChain inline GPUParam& param() { return mRec->param(); } inline const GPUConstantMem* processors() const { return mRec->processors(); } inline void SynchronizeStream(int32_t stream) { mRec->SynchronizeStream(stream); } + inline void SetONNXGPUStream(Ort::SessionOptions& opt, int32_t stream, int32_t* deviceId) { mRec->SetONNXGPUStream(opt, stream, deviceId); } inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); } inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 4047dcae0a6b3..7db0ba66305e9 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -611,49 +611,89 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) } + if (doGPU && mIOPtrs.tpcZS) { + processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta; + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); + } + if (doGPU) { + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); + } + #ifdef GPUCA_HAS_ONNX + const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; + GPUTPCNNClusterizerHost nnApplications[GetProcessingSettings().nTPCClustererLanes]; + if (GetProcessingSettings().nn.applyNNclusterizer) { - uint32_t maxClusters = -1; - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters); - } - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; - const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; - clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression; - clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow; - clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad; - clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime; - clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData; - clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0); - clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode; - clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue; - clustererNN.nnClusterizerTotalClusters = maxClusters; - clustererNN.nnClassThreshold = nn_settings.nnClassThreshold; - clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold; - if (clustererNN.nnSigmoidTrafoClassThreshold) { - clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold)); + int32_t deviceId = -1; + int32_t numLanes = GetProcessingSettings().nTPCClustererLanes; + int32_t maxThreads = mRec->getNKernelHostThreads(true); + // bool recreateMemoryAllocator = false; + mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) { + nnApplications[lane].init(nn_settings); + if (nnApplications[lane].modelsUsed[0]) { + SetONNXGPUStream(*(nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].model_class).setDeviceId(deviceId); + if (nnApplications[lane].model_class.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].model_class.setIntraOpNumThreads(maxThreads); + } + (nnApplications[lane].model_class).initEnvironment(); + // Registering this once seems to be enough, even with different environmnents / models. ONNX apparently uses this per device and stores the OrtAllocator internally. All models will then use the volatile allocation. + // But environment must be valid, so we init the model environment first and use it here afterwards. + // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. + // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); + // recreateMemoryAllocator = true; + (nnApplications[lane].model_class).initSession(); } - if (nn_settings.nnClusterizerVerbosity < 0) { - clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity; - } else { - clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity; + if (nnApplications[lane].modelsUsed[1]) { + SetONNXGPUStream(*(nnApplications[lane].model_reg_1).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].model_reg_1).setDeviceId(deviceId); + if (nnApplications[lane].model_reg_1.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].model_reg_1.setIntraOpNumThreads(maxThreads); + } + // (nnApplications[lane].model_reg_1).setEnv((nnApplications[lane].model_class).getEnv()); + (nnApplications[lane].model_reg_1).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_1).getEnv(), (nnApplications[lane].model_reg_1).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].model_reg_1).initSession(); + } + if (nnApplications[lane].modelsUsed[2]) { + SetONNXGPUStream(*(nnApplications[lane].model_reg_2).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].model_reg_2).setDeviceId(deviceId); + if (nnApplications[lane].model_reg_2.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].model_reg_2.setIntraOpNumThreads(maxThreads); + } + (nnApplications[lane].model_reg_2).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].model_reg_2).initSession(); + } + if (nn_settings.nnClusterizerVerbosity < 3) { + LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId; + } + }); + mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) { + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector]; + GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN; + int32_t lane = sector % numLanes; + clustererNN.deviceId = deviceId; + clustererNN.mISector = sector; + clustererNN.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + nnApplications[lane].initClusterizer(nn_settings, clustererNN); + if (doGPU) { + clustererNNShadow.deviceId = deviceId; + clustererNNShadow.mISector = sector; + clustererNNShadow.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow); } - clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos; - GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); AllocateRegisteredMemory(clustererNN.mMemoryId); + }); + if (doGPU) { + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } + LOG(info) << "Size of nnApplications[lane]: " << sizeof(nnApplications[0]) << " bytes"; + LOG(info) << "Size of nnApplications: " << sizeof(GPUTPCNNClusterizerHost) * GetProcessingSettings().nTPCClustererLanes << " bytes"; } #endif - if (doGPU && mIOPtrs.tpcZS) { - processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta; - WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); - } - if (doGPU) { - WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); - } - size_t nClsTotal = 0; ClusterNativeAccess* tmpNativeAccess = mClusterNativeAccess.get(); ClusterNative* tmpNativeClusters = nullptr; @@ -914,58 +954,122 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (GetProcessingSettings().nn.applyNNclusterizer) { #ifdef GPUCA_HAS_ONNX - GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; - const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; - GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane]; + GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; + GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; + + int withMC = (doGPU && propagateMCLabels); - if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + if (clustererNNShadow.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } - float time_clusterizer = 0, time_fill = 0; - for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNN.nnClusterizerBatchedMode); batch++) { - uint batchStart = batch * clustererNN.nnClusterizerBatchedMode; - size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + float time_clusterizer = 0, time_fill = 0, time_networks = 0; + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) { + uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); auto start0 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data + runKernel({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data - auto stop0 = std::chrono::high_resolution_clock::now(); - auto start1 = std::chrono::high_resolution_clock::now(); - nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnClusterizerDtype); + // auto stop0 = std::chrono::high_resolution_clock::now(); + // auto start1 = std::chrono::high_resolution_clock::now(); + + // NN evaluations + if (clustererNNShadow.nnInferenceInputDType == 0) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_32); + } + } else if (clustererNNShadow.nnInferenceInputDType == 1) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_32); + } + } + if (!clustererNNShadow.nnClusterizerUseCfRegression) { + if (clustererNNShadow.nnInferenceInputDType == 0) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_32); + } + } else if (clustererNNShadow.nnInferenceInputDType == 1) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_32); + } + } + if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { + if (clustererNNShadow.nnInferenceInputDType == 0) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_32); + } + } else if (clustererNNShadow.nnInferenceInputDType == 1) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_32); + } + } + } + } + + // auto stopNNs = std::chrono::high_resolution_clock::now(); + + // Publishing kernels if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels } else { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels } - - if (!clustererNN.nnClusterizerUseCfRegression) { - nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype); - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1 - if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) { - nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype); - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2 + if (!clustererNNShadow.nnClusterizerUseCfRegression) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results + if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results } } - auto stop1 = std::chrono::high_resolution_clock::now(); - time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; - time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; + // for(int i = 0; i < iSize; ++i) { + // if(clustererNNShadow.outputDataClass[i + batchStart] > 1) { + // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.modelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.outputDataClass[i + batchStart] << " thresh " << clustererNNShadow.nnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; + // // std::string input = "["; + // // for(int j = 0; j < clustererNNShadow.nnClusterizerElementSize; j++){ + // // input += std::to_string(clustererNNShadow.inputData_16[i * clustererNNShadow.nnClusterizerElementSize + j].ToFloat()) + ", "; + // // } + // // input += "]"; + // // LOG(info) << "Input is: " << input; + // } + // } + + // auto stop1 = std::chrono::high_resolution_clock::now(); + + // time_networks += std::chrono::duration_cast(stopNNs - start1).count() / 1e9; + // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + // time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; } - auto start1 = std::chrono::high_resolution_clock::now(); - if (clustererNN.nnClusterizerUseCfRegression) { - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 - } - auto stop1 = std::chrono::high_resolution_clock::now(); - time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; - if (clustererNN.nnClusterizerVerbosity < 3) { - int acceptedClusters = 0; - for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { - acceptedClusters += clustererNN.outputDataClass[i]; - } - LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; + if (clustererNNShadow.nnClusterizerUseCfRegression) { + // auto start1 = std::chrono::high_resolution_clock::now(); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + // auto stop1 = std::chrono::high_resolution_clock::now(); + // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; } + // if (clustererNNShadow.nnClusterizerVerbosity < 3) { + // int acceptedClusters = 0; + // for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { + // if(clustererNNShadow.outputDataClass[i] > 1 || clustererNNShadow.outputDataClass[i] < 0) { + // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.outputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; + // } + // acceptedClusters += clustererNNShadow.outputDataClass[i]; + // } + // LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; + // } #else GPUFatal("Project not compiled with neural network clusterization. Aborting."); #endif @@ -1066,6 +1170,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { + // if (GetProcessingSettings().nn.applyNNclusterizer) { + // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; + // nnApplication.model_class.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.model_reg_1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.model_reg_2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx index 6a9b6f546ae07..fe3202fe7b439 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -24,29 +24,73 @@ void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {} void* GPUTPCNNClusterizer::setIOPointers(void* mem) { - if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize); - } else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize); - } - computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode); - computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); - computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode); - computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); - if (nnClusterizerModelClassNumOutputNodes > 0) { - computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); - } - if (!nnClusterizerUseCfRegression) { - if (nnClusterizerModelReg1NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + if (nnClusterizerBatchedMode > 0) { + if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData_16, nnClusterizerBatchedMode * nnClusterizerElementSize); + } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData_32, nnClusterizerBatchedMode * nnClusterizerElementSize); } - if (nnClusterizerModelReg2NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); + + if (nnInferenceOutputDType == 0 && nnClusterizerElementSize > 0) { + if (nnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, modelProbabilities_16, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } + if (!nnClusterizerUseCfRegression) { + if (nnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg1_16, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + } + if (nnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg2_16, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + } + } + } else if (nnInferenceOutputDType == 1 && nnClusterizerElementSize > 0) { + if (nnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, modelProbabilities_32, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } + if (!nnClusterizerUseCfRegression) { + if (nnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg1_32, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + } + if (nnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg2_32, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + } + } } } + if (nnClusterizerTotalClusters > 0) { + computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); + } return mem; } +// std::vector GPUTPCNNClusterizer::pointerSizes() { +// std::vector sizes(7, -1); +// if (nnClusterizerBatchedMode > 0) { +// if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { +// sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16 +// } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { +// sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32 +// } +// sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags +// if (nnClusterizerModelClassNumOutputNodes > 0) { +// sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities +// } +// if (!nnClusterizerUseCfRegression) { +// if (nnClusterizerModelReg1NumOutputNodes > 0) { +// sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1 +// } +// if (nnClusterizerModelReg2NumOutputNodes > 0) { +// sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2 +// } +// } +// } +// if (nnClusterizerTotalClusters > 0) { +// sizes[6] = nnClusterizerTotalClusters; // outputDataClass +// } +// return sizes; +// } + void GPUTPCNNClusterizer::RegisterMemoryAllocation() { AllocateAndInitializeLate(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h index ea6340dfd48bc..da490b0f94d58 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -42,33 +42,38 @@ class GPUTPCNNClusterizer : public GPUProcessor int nnClusterizerSizeInputTime = 3; int nnClusterizerElementSize = -1; bool nnClusterizerAddIndexData = true; - float nnClassThreshold = 0.16; + float nnClassThreshold = 0.01; bool nnSigmoidTrafoClassThreshold = 1; int nnClusterizerUseCfRegression = 0; int nnClusterizerBatchedMode = 1; int nnClusterizerTotalClusters = 1; int nnClusterizerVerbosity = 0; int nnClusterizerBoundaryFillValue = -1; - int nnClusterizerDumpDigits = 0; - int nnClusterizerApplyCfDeconvolution = 0; int nnClusterizerModelClassNumOutputNodes = -1; int nnClusterizerModelReg1NumOutputNodes = -1; int nnClusterizerModelReg2NumOutputNodes = -1; - int nnClusterizerDtype = 0; // 0: float16, 1: float32 + int nnInferenceInputDType = 0; // 0: float16, 1: float32 + int nnInferenceOutputDType = 0; // 0: float16, 1: float32 int mISector = -1; + int deviceId = -1; // Memory allocation for neural network - uint class2_elements = 0; - float* inputData32 = nullptr; - OrtDataType::Float16_t* inputData16 = nullptr; - float* outputDataClass = nullptr; - float* modelProbabilities = nullptr; - float* outputDataReg1 = nullptr; - float* outputDataReg2 = nullptr; - ChargePos* peakPositions = nullptr; - bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx - float* centralCharges = nullptr; + bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr + int* outputDataClass = nullptr; + + // FP32 + float* inputData_32 = nullptr; + float* modelProbabilities_32 = nullptr; + float* outputDataReg1_32 = nullptr; + float* outputDataReg2_32 = nullptr; + + // FP16 + OrtDataType::Float16_t* inputData_16 = nullptr; + OrtDataType::Float16_t* modelProbabilities_16 = nullptr; + OrtDataType::Float16_t* outputDataReg1_16 = nullptr; + OrtDataType::Float16_t* outputDataReg2_16 = nullptr; + int16_t mMemoryId = -1; }; // class GPUTPCNNClusterizer diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index 5002c63524020..db2f05711f537 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -12,54 +12,205 @@ /// \file GPUTPCNNClusterizerHost.cxx /// \author Christian Sonnabend +#include + #include "GPUTPCNNClusterizerHost.h" #include "GPUTPCNNClusterizer.h" #include "GPUSettings.h" #include "ML/3rdparty/GPUORTFloat16.h" +#include "GPUReconstruction.h" + +#ifdef GPUCA_HAS_ONNX +#include +#endif using namespace o2::gpu; -GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer) +void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings) { + std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath; + std::vector reg_model_paths; + std::vector evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':'); + + if (settings.nnLoadFromCCDB) { + reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to NeuralNetworkClusterizer.cxx, otherwise the networks might be loaded from the wrong place + if (evalMode[0] == "c1") { + class_model_path = settings.nnLocalFolder + "/net_classification_c1.onnx"; + } else if (evalMode[0] == "c2") { + class_model_path = settings.nnLocalFolder + "/net_classification_c2.onnx"; + } + + if (evalMode[1] == "r2") { + reg_model_path += ":" + settings.nnLocalFolder + "/net_regression_c2.onnx"; + } + } + OrtOptions = { - {"model-path", settings.nnClassificationPath}, - {"device", settings.nnInferenceDevice}, - {"device-id", std::to_string(settings.nnInferenceDeviceId)}, + {"model-path", class_model_path}, + {"device-type", settings.nnInferenceDevice}, {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)}, - {"dtype", settings.nnInferenceDtype}, {"intra-op-num-threads", std::to_string(settings.nnInferenceIntraOpNumThreads)}, {"inter-op-num-threads", std::to_string(settings.nnInferenceInterOpNumThreads)}, {"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)}, {"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)}, {"profiling-output-path", settings.nnInferenceOrtProfilingPath}, - {"logging-level", std::to_string(settings.nnInferenceVerbosity)}}; + {"logging-level", std::to_string(settings.nnInferenceVerbosity)}, + {"onnx-environment-name", "c1"}}; - model_class.init(OrtOptions); - clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + model_class.initOptions(OrtOptions); + modelsUsed[0] = true; - reg_model_paths = splitString(settings.nnRegressionPath, ":"); + reg_model_paths = o2::utils::Str::tokenize(reg_model_path, ':'); if (!settings.nnClusterizerUseCfRegression) { - if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) { + if (reg_model_paths.size() == 1) { OrtOptions["model-path"] = reg_model_paths[0]; - model_reg_1.init(OrtOptions); - clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + OrtOptions["onnx-environment-name"] = "r1"; + model_reg_1.initOptions(OrtOptions); + modelsUsed[1] = true; } else { OrtOptions["model-path"] = reg_model_paths[0]; - model_reg_1.init(OrtOptions); - clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + OrtOptions["onnx-environment-name"] = "r1"; + model_reg_1.initOptions(OrtOptions); + modelsUsed[1] = true; OrtOptions["model-path"] = reg_model_paths[1]; - model_reg_2.init(OrtOptions); - clusterer.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + OrtOptions["onnx-environment-name"] = "r2"; + model_reg_2.initOptions(OrtOptions); + modelsUsed[2] = true; } } } -void GPUTPCNNClusterizerHost::networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype) +void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN) { - if (dtype == 0) { - model.inference(clusterer.inputData16, size * clusterer.nnClusterizerElementSize, output); + clustererNN.nnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression; + clustererNN.nnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; + clustererNN.nnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; + clustererNN.nnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; + clustererNN.nnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; + clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); + clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; + clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; + clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; + if (clustererNN.nnSigmoidTrafoClassThreshold) { + clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold)); + } else { + clustererNN.nnClassThreshold = settings.nnClassThreshold; + } + if (settings.nnClusterizerVerbosity < 0) { + clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity; } else { - model.inference(clusterer.inputData32, size * clusterer.nnClusterizerElementSize, output); + clustererNN.nnClusterizerVerbosity = settings.nnClusterizerVerbosity; + } + clustererNN.nnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos; + clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos; + clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + if (!settings.nnClusterizerUseCfRegression) { + if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) { + clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + } else { + clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + clustererNN.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + } + } +} + +// MockedOrtAllocator implementation to be able to use volatile assignment +struct MockedOrtAllocator : OrtAllocator { + MockedOrtAllocator(GPUReconstruction* = nullptr, OrtMemoryInfo* = nullptr); + ~MockedOrtAllocator(); + + void* Alloc(size_t size); + void Free(void* p); + const OrtMemoryInfo* Info() const; + void* Reserve(size_t size); + size_t NumAllocations() const; + size_t NumReserveAllocations() const; + + void LeakCheck(); + + private: + MockedOrtAllocator(const MockedOrtAllocator&) = delete; + MockedOrtAllocator& operator=(const MockedOrtAllocator&) = delete; + + std::atomic memory_inuse{0}; + std::atomic num_allocations{0}; + std::atomic num_reserve_allocations{0}; + OrtMemoryInfo* memory_info; + GPUReconstruction* rec; +}; + +MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) +{ + OrtAllocator::version = ORT_API_VERSION; + OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Alloc(size); }; + OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; + OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast(this_)->Info(); }; + OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Reserve(size); }; + rec = r; + memory_info = info; +} + +MockedOrtAllocator::~MockedOrtAllocator() +{ + // Ort::GetApi().ReleaseMemoryInfo(memory_info); +} + +void* MockedOrtAllocator::Alloc(size_t size) +{ + // LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes"; + return rec->AllocateVolatileDeviceMemory(size); +} + +void* MockedOrtAllocator::Reserve(size_t size) +{ + // LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes"; + return rec->AllocateVolatileDeviceMemory(size); +} + +void MockedOrtAllocator::Free(void* p) +{ + // LOG(info) << "(ORT) Freeing volatile memory " << p; + rec->ReturnVolatileDeviceMemory(); +} + +const OrtMemoryInfo* MockedOrtAllocator::Info() const +{ + return memory_info; +} + +size_t MockedOrtAllocator::NumAllocations() const +{ + return num_allocations.load(); +} + +size_t MockedOrtAllocator::NumReserveAllocations() const +{ + return num_reserve_allocations.load(); +} + +void MockedOrtAllocator::LeakCheck() +{ + if (memory_inuse.load()) + LOG(warning) << "memory leak!!!"; +} + +void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) +{ + mockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); + if (recreate) { + Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo))); } + Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc.get())); + memInfo = (Ort::MemoryInfo*)mockedAlloc->Info(); +} + +const OrtMemoryInfo* GPUTPCNNClusterizerHost::getMockedMemoryInfo() +{ + return mockedAlloc->Info(); +} + +MockedOrtAllocator* GPUTPCNNClusterizerHost::getMockedAllocator() +{ + return mockedAlloc.get(); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index 7efa0edecb893..0379b83d0ae02 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -22,6 +22,15 @@ using namespace o2::ml; +class OrtMemoryInfo; +class OrtAllocator; +struct MockedOrtAllocator; +namespace Ort +{ +struct Env; +struct MemoryInfo; +} // namespace Ort + namespace o2::OrtDataType { struct Float16_t; @@ -30,6 +39,7 @@ struct Float16_t; namespace o2::gpu { +class GPUReconstruction; class GPUTPCNNClusterizer; struct GPUSettingsProcessingNNclusterizer; @@ -37,30 +47,23 @@ class GPUTPCNNClusterizerHost { public: GPUTPCNNClusterizerHost() = default; - GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); + GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings) { init(settings); } + + void init(const GPUSettingsProcessingNNclusterizer&); + void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); - void networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype); + // ONNX + void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); + MockedOrtAllocator* getMockedAllocator(); + const OrtMemoryInfo* getMockedMemoryInfo(); std::unordered_map OrtOptions; o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters + std::vector modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2 + int32_t deviceId = -1; std::vector reg_model_paths; - private: - // Avoid including CommonUtils/StringUtils.h - std::vector splitString(const std::string& input, const std::string& delimiter) - { - std::vector tokens; - std::size_t pos = 0; - std::size_t found; - - while ((found = input.find(delimiter, pos)) != std::string::npos) { - tokens.push_back(input.substr(pos, found - pos)); - pos = found + delimiter.length(); - } - tokens.push_back(input.substr(pos)); - - return tokens; - } + std::shared_ptr mockedAlloc = nullptr; }; // class GPUTPCNNClusterizerHost } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 379ea27443fea..2cf9ab2037007 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -35,7 +35,7 @@ using namespace o2::gpu::tpccf; // Defining individual thread functions for data filling, determining the class label and running the CF clusterizer template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) { uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; @@ -45,109 +45,26 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); - tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; o2::gpu::GPUTPCCFClusterizer::GPUSharedMemory smem_new; GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem_new, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow); } template <> GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - GPUTPCNNClusterizerKernels::fillInputData(nBlocks, nThreads, iBlock, iThread, processors, sector, dtype, batchStart); -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - uint glo_idx = get_global_id(0); - processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - auto& clusterer = processors.tpcNNClusterer[sector]; - uint glo_idx = get_global_id(0); - uint elem_iterator = glo_idx * clusterer.nnClusterizerModelClassNumOutputNodes; - float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] - uint class_label = 0; - for (int pIdx = elem_iterator; pIdx < elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes; pIdx++) { - if (pIdx == elem_iterator) { - current_max_prob = clusterer.modelProbabilities[pIdx]; - } else { - class_label = (clusterer.modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label); - } - } - // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" - clusterer.outputDataClass[glo_idx + batchStart] = class_label; -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - uint glo_idx = get_global_id(0); - if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { - return; - } - GPUTPCNNClusterizerKernels::publishClustersReg1(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - uint glo_idx = get_global_id(0); - if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { - return; - } - GPUTPCNNClusterizerKernels::publishClustersReg2(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); -} - -// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary -GPUd() int GPUTPCNNClusterizerKernels::padOffset(int row_ref, int row_current) -{ - return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2); -} - -GPUd() int GPUTPCNNClusterizerKernels::rowOffset(int row, int global_shift) -{ - return (row > 62 ? global_shift : 0); -} - -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) -{ - if (pad < 0 || row < 0) { // Faster short-circuit - return true; - } else if (row < 63) { - return (pad >= static_cast(GPUTPCGeometry::NPads(row))); - } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network - return true; - } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { - return (pad >= static_cast(GPUTPCGeometry::NPads(row - global_shift))); - } else { - return true; - } -} - -// Filling the input data for the neural network where there is no boundary -GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& processors, uint8_t sector, int8_t dtype, uint batchStart) { uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); - uint write_idx = glo_idx * clustererNN.nnClusterizerElementSize; // Potential optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + Array2D isPeakMap(clusterer.mPpeakMap); ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - - clustererNN.peakPositions[glo_idx] = peak; - clustererNN.centralCharges[glo_idx] = central_charge; - clustererNN.outputDataClass[glo_idx + batchStart] = -1; - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif @@ -160,20 +77,20 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n if (!is_boundary) { ChargePos tmp_pos(row + r, pad + p, time + t); if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization - clustererNN.clusterFlags[2 * glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; } if (dtype == 0) { - clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); - } else { - clustererNN.inputData32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + } else if (dtype == 1) { + clustererNN.inputData_32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; } } else { // Filling boundary just to make sure that no values are left unintentionally if (dtype == 0) { - clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.inputData_32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); } } write_idx++; @@ -182,66 +99,191 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n } if (clustererNN.nnClusterizerAddIndexData) { if (dtype == 0) { - clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISector / 36.f); - clustererNN.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.inputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.inputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + } else { + clustererNN.inputData_32[write_idx] = sector / 36.f; + clustererNN.inputData_32[write_idx + 1] = row / 152.f; + clustererNN.inputData_32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); + } + } +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + uint base_idx = CAMath::Floor(glo_idx / clustererNN.nnClusterizerElementSize); + uint transient_index = glo_idx % clustererNN.nnClusterizerElementSize; + + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + Array2D isPeakMap(clusterer.mPpeakMap); + ChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; + int row = static_cast(peak.row()), pad = static_cast(peak.pad()); + + if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) { + uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; + for (uint16_t i = 0; i < 8; i++) { + Delta2 d = cfconsts::InnerNeighbors[i]; + ChargePos tmp_pos = peak.delta(d); + clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + } + if (dtype == 0) { + clustererNN.inputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.inputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.inputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { - clustererNN.inputData32[write_idx] = clusterer.mISector / 36.f; - clustererNN.inputData32[write_idx + 1] = row / 152.f; - clustererNN.inputData32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); + clustererNN.inputData_32[top_idx - 3] = sector / 36.f; + clustererNN.inputData_32[top_idx - 2] = row / 152.f; + clustererNN.inputData_32[top_idx - 1] = static_cast(pad) / GPUTPCGeometry::NPads(row); + } + } else if (transient_index < (clustererNN.nnClusterizerElementSize - 3)) { + int time = static_cast(peak.time()); + int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow; + bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); + if (is_row_boundary) { + if (dtype == 0) { + clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + } else { + clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + } + } else { + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); + int rest_1 = transient_index % ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1)); + int p = CAMath::Floor(rest_1 / (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputPad + pad_offset; + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); + + if (!is_boundary) { + float central_charge = static_cast(chargeMap[peak].unpack()); + int t = (rest_1 % (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputTime; + ChargePos tmp_pos(row + r, pad + p, time + t); + if (dtype == 0) { + clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + } else if (dtype == 1) { + clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + } + } else { + if (dtype == 0) { + clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + } else { + clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + } + } } } } -GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + if (dtype == 0) { + processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].modelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].nnClassThreshold); + } else if (dtype == 1) { + processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); + } +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) { + auto& clustererNN = processors.tpcNNClusterer[sector]; + uint glo_idx = get_global_id(0); + uint elem_iterator = glo_idx * clustererNN.nnClusterizerModelClassNumOutputNodes; + float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] + uint class_label = 0; + for (int pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes; pIdx++) { + if (pIdx == elem_iterator) { + if (dtype == 0) { + current_max_prob = static_cast(clustererNN.modelProbabilities_16[pIdx]); + } else if (dtype == 1) { + current_max_prob = clustererNN.modelProbabilities_32[pIdx]; + } + } else { + if (dtype == 0) { + current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_16[pIdx].ToFloat()); + } else if (dtype == 1) { + current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_32[pIdx]); + } + } + } + // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" + clustererNN.outputDataClass[glo_idx + batchStart] = class_label; + if (class_label > 1) { + clustererNN.clusterFlags[2 * glo_idx] = 1; + clustererNN.clusterFlags[2 * glo_idx + 1] = 1; + } +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + float central_charge = static_cast(chargeMap[peak].unpack()); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); - tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; uint full_glo_idx = glo_idx + batchStart; int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg1NumOutputNodes; // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); - if (clustererNN.outputDataClass[full_glo_idx] == 1) { + if (clustererNN.outputDataClass[full_glo_idx] == 1 || (clustererNN.nnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.outputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; // Publishing logic is taken from default clusterizer - if (onlyMC) { + if (withMC) { ClusterAccumulator dummy_pc; - CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + CPU_ONLY(labelAcc->collect(peak, central_charge)); GPUTPCCFClusterizer::buildCluster( clusterer.Param().rec, chargeMap, - clustererNN.peakPositions[glo_idx], + peak, smem.posBcast, smem.buf, smem.innerAboveThreshold, &dummy_pc, labelAcc); } - - if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; } return; } - pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg1[model_output_index + 4], - static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg1[model_output_index], - clustererNN.outputDataReg1[model_output_index + 2], - (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg1[model_output_index + 1], - clustererNN.outputDataReg1[model_output_index + 3], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + if (dtype == 0) { + pc.setFull(central_charge * clustererNN.outputDataReg1_16[model_output_index + 4].ToFloat(), + static_cast(peak.pad()) + clustererNN.outputDataReg1_16[model_output_index].ToFloat(), + clustererNN.outputDataReg1_16[model_output_index + 2].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_16[model_output_index + 1].ToFloat(), + clustererNN.outputDataReg1_16[model_output_index + 3].ToFloat(), + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } else if (dtype == 1) { + pc.setFull(central_charge * clustererNN.outputDataReg1_32[model_output_index + 4], + static_cast(peak.pad()) + clustererNN.outputDataReg1_32[model_output_index], + clustererNN.outputDataReg1_32[model_output_index + 2], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_32[model_output_index + 1], + clustererNN.outputDataReg1_32[model_output_index + 3], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -250,11 +292,11 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha } uint rowIndex = 0; - if (clusterer.mPclusterByRow != nullptr) { + if (clusterOut != nullptr) { rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( clusterer, myCluster, - clustererNN.peakPositions[glo_idx].row(), + peak.row(), clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut); @@ -264,7 +306,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha } else if (clusterer.mPclusterPosInRow) { rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; } - CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); } else { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -273,38 +315,41 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha } } -GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) { + uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + float central_charge = static_cast(chargeMap[peak].unpack()); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); - tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; uint full_glo_idx = glo_idx + batchStart; int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg2NumOutputNodes; - // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg2NumOutputNodes << " -- " << clustererNN.peakPositions.size() << " -- " << clustererNN.centralCharges.size(); - if (clustererNN.outputDataClass[full_glo_idx] > 0) { ClusterAccumulator pc; - if (onlyMC) { + if (withMC) { ClusterAccumulator dummy_pc; - CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + CPU_ONLY(labelAcc->collect(peak, central_charge)); GPUTPCCFClusterizer::buildCluster( clusterer.Param().rec, chargeMap, - clustererNN.peakPositions[glo_idx], + peak, smem.posBcast, smem.buf, smem.innerAboveThreshold, &dummy_pc, labelAcc); } - - if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; } @@ -312,16 +357,26 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } // Cluster 1 - pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 8], - static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index], - clustererNN.outputDataReg2[model_output_index + 4], - (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 2], - clustererNN.outputDataReg2[model_output_index + 6], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + if (dtype == 0) { + pc.setFull(central_charge * clustererNN.outputDataReg2_16[model_output_index + 8].ToFloat(), + static_cast(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 4].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 2].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 6].ToFloat(), + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } else if (dtype == 1) { + pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 8], + static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index], + clustererNN.outputDataReg2_32[model_output_index + 4], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 2], + clustererNN.outputDataReg2_32[model_output_index + 6], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -330,11 +385,11 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } uint rowIndex = 0; - if (clusterer.mPclusterByRow != nullptr) { + if (clusterOut != nullptr) { rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( clusterer, myCluster, - clustererNN.peakPositions[glo_idx].row(), + peak.row(), clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut); @@ -344,18 +399,28 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } else if (clusterer.mPclusterPosInRow) { rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; } - CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); // Cluster 2 - pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 9], - static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index + 1], - clustererNN.outputDataReg2[model_output_index + 5], - (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 3], - clustererNN.outputDataReg2[model_output_index + 7], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); - - rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (dtype == 0) { + pc.setFull(central_charge * clustererNN.outputDataReg2_16[model_output_index + 9].ToFloat(), + static_cast(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index + 1].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 5].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 3].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 7].ToFloat(), + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } else if (dtype == 1) { + pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 9], + static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index + 1], + clustererNN.outputDataReg2_32[model_output_index + 5], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 3], + clustererNN.outputDataReg2_32[model_output_index + 7], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } + + rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -363,11 +428,11 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha return; } - if (clusterer.mPclusterByRow != nullptr) { + if (clusterOut != nullptr) { rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( clusterer, myCluster, - clustererNN.peakPositions[glo_idx].row(), + peak.row(), clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut); @@ -377,7 +442,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } else if (clusterer.mPclusterPosInRow) { rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; } - // CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters? + // CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters? } else { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -385,3 +450,29 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha return; } } + +// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary +GPUd() int GPUTPCNNClusterizerKernels::padOffset(int row_ref, int row_current) +{ + return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2); +} + +GPUd() int GPUTPCNNClusterizerKernels::rowOffset(int row, int global_shift) +{ + return (row > 62 ? global_shift : 0); +} + +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) +{ + if (pad < 0 || row < 0) { // Faster short-circuit + return true; + } else if (row < 63) { + return (pad >= static_cast(GPUTPCGeometry::NPads(row))); + } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network + return true; + } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { + return (pad >= static_cast(GPUTPCGeometry::NPads(row - global_shift))); + } else { + return true; + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index a1d641fdb0b93..27cfba2487aed 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -39,6 +39,7 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate public: // Must all have same number of threads, since they use a common SCRATCH_PAD_WORK_GROUP_SIZE below static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNNSingleElement) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); @@ -59,10 +60,11 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate enum K : int32_t { runCfClusterizer = 0, fillInputNN = 1, - determineClass1Labels = 2, - determineClass2Labels = 3, - publishClass1Regression = 4, - publishClass2Regression = 5, + fillInputNNSingleElement = 2, + determineClass1Labels = 3, + determineClass2Labels = 4, + publishClass1Regression = 5, + publishClass2Regression = 6, }; template diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index fcf576d828b7f..7e3ddf868af2a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -116,6 +116,7 @@ o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUS if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) diff --git a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h index 0038233f1c376..73f1f208e8889 100644 --- a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h +++ b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h @@ -83,6 +83,7 @@ class GPUO2Interface; struct TPCPadGainCalib; struct TPCZSLinkMapping; struct GPUSettingsO2; +struct GPUSettingsProcessingNNclusterizer; class GPUO2InterfaceQA; struct GPUTrackingInOutPointers; struct GPUTrackingInOutZS; @@ -225,6 +226,8 @@ class GPURecoWorkflowSpec : public o2::framework::Task uint32_t mNextThreadIndex = 0; bool mUpdateGainMapCCDB = true; std::unique_ptr mTFSettings; + std::unique_ptr mNNClusterizerSettings; + Config mSpecConfig; std::shared_ptr mGGR; bool mGRPGeomUpdated = false; diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index 7ad03ec58ae80..8a755a703705f 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -78,6 +78,7 @@ #include "DetectorsRaw/RDHUtils.h" #include "ITStracking/TrackingInterface.h" #include "GPUWorkflowInternal.h" +#include "TPCCalibration/NeuralNetworkClusterizer.h" // #include "Framework/ThreadPool.h" #include @@ -132,6 +133,50 @@ void GPURecoWorkflowSpec::init(InitContext& ic) { GRPGeomHelper::instance().setRequest(mGGR); GPUO2InterfaceConfiguration& config = *mConfig.get(); + GPUSettingsProcessingNNclusterizer& mNNClusterizerSettings = mConfig->configProcessing.nn; + + if (mNNClusterizerSettings.nnLoadFromCCDB) { + LOG(info) << "Loading neural networks from CCDB"; + o2::tpc::NeuralNetworkClusterizer nnClusterizerFetcher; + nnClusterizerFetcher.initCcdbApi(mNNClusterizerSettings.nnCCDBURL); + std::map ccdbSettings = { + {"nnCCDBURL", mNNClusterizerSettings.nnCCDBURL}, + {"nnCCDBPath", mNNClusterizerSettings.nnCCDBPath}, + {"inputDType", mNNClusterizerSettings.nnInferenceInputDType}, + {"outputDType", mNNClusterizerSettings.nnInferenceOutputDType}, + {"outputFolder", mNNClusterizerSettings.nnLocalFolder}, + {"nnCCDBPath", mNNClusterizerSettings.nnCCDBPath}, + {"nnCCDBWithMomentum", std::to_string(mNNClusterizerSettings.nnCCDBWithMomentum)}, + {"nnCCDBBeamType", mNNClusterizerSettings.nnCCDBBeamType}, + {"nnCCDBInteractionRate", std::to_string(mNNClusterizerSettings.nnCCDBInteractionRate)}}; + + std::string nnFetchFolder = mNNClusterizerSettings.nnLocalFolder; + std::vector evalMode = o2::utils::Str::tokenize(mNNClusterizerSettings.nnEvalMode, ':'); + + if (evalMode[0] == "c1") { + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBClassificationLayerType; + ccdbSettings["nnCCDBEvalType"] = "classification_c1"; + ccdbSettings["outputFile"] = "net_classification_c1.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + } else if (evalMode[0] == "c2") { + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBClassificationLayerType; + ccdbSettings["nnCCDBEvalType"] = "classification_c2"; + ccdbSettings["outputFile"] = "net_classification_c2.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + } + + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBRegressionLayerType; + ccdbSettings["nnCCDBEvalType"] = "regression_c1"; + ccdbSettings["outputFile"] = "net_regression_c1.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + if (evalMode[1] == "r2") { + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBRegressionLayerType; + ccdbSettings["nnCCDBEvalType"] = "regression_c2"; + ccdbSettings["outputFile"] = "net_regression_c2.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + } + LOG(info) << "Neural network loading done!"; + } // Create configuration object and fill settings mConfig->configGRP.solenoidBzNominalGPU = 0; From 760831f9229c18525c9442ae053b8b348afb2cf3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 19 Apr 2025 15:24:00 +0200 Subject: [PATCH 0269/1764] GPU: Simplify kernel Call interface, remove intermediate classes and headers where possible --- .../workflow/src/TRDGlobalTrackingSpec.cxx | 2 + GPU/GPUTracking/Base/GPUProcessor.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 257 ++++++++++-------- GPU/GPUTracking/Base/GPUReconstruction.h | 107 +++----- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 49 ++-- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 111 +------- .../Base/GPUReconstructionCPUKernels.h | 98 +++++++ .../Base/GPUReconstructionConvert.cxx | 2 +- .../Base/GPUReconstructionDeviceBase.cxx | 21 +- .../Base/GPUReconstructionDeviceBase.h | 8 +- GPU/GPUTracking/Base/GPUReconstructionIO.h | 40 ++- .../Base/GPUReconstructionKernelMacros.h | 2 +- .../Base/GPUReconstructionKernels.h | 115 -------- .../Base/GPUReconstructionLibrary.cxx | 1 + .../Base/GPUReconstructionProcessing.cxx | 19 +- .../Base/GPUReconstructionProcessing.h | 98 ++++++- .../GPUReconstructionProcessingKernels.inc | 41 +++ .../Base/GPUReconstructionTimeframe.cxx | 2 + .../Base/cuda/GPUReconstructionCUDA.cu | 101 ++++--- .../Base/cuda/GPUReconstructionCUDA.h | 24 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 62 ++--- .../cuda/GPUReconstructionCUDAInternals.h | 8 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 14 +- .../Base/opencl/GPUReconstructionOCL.cxx | 109 ++++---- .../Base/opencl/GPUReconstructionOCL.h | 13 +- .../opencl/GPUReconstructionOCLIncludesHost.h | 6 +- .../opencl/GPUReconstructionOCLKernels.cxx | 19 +- GPU/GPUTracking/CMakeLists.txt | 8 +- .../DataCompression/GPUTPCCompression.cxx | 3 +- .../DataTypes/GPUKernelClassesFwd.h | 40 +++ GPU/GPUTracking/Global/GPUChain.h | 46 +++- GPU/GPUTracking/Global/GPUChainITS.cxx | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 4 +- GPU/GPUTracking/Global/GPUChainTracking.h | 41 +-- .../Global/GPUChainTrackingClusterizer.cxx | 37 ++- .../Global/GPUChainTrackingCompression.cxx | 3 + .../GPUChainTrackingDebugAndProfiling.cxx | 6 +- GPU/GPUTracking/Global/GPUChainTrackingDefs.h | 2 +- .../Global/GPUChainTrackingGetters.inc | 36 +++ GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 1 + .../Global/GPUChainTrackingMerger.cxx | 5 + .../Global/GPUChainTrackingRefit.cxx | 4 + .../Global/GPUChainTrackingSectorTracker.cxx | 10 + .../Global/GPUChainTrackingTRD.cxx | 10 +- .../Global/GPUChainTrackingTransformation.cxx | 4 + .../Global/GPUTrackingInputProvider.cxx | 3 + GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 + GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 1 + GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- .../SectorTracker/GPUTPCTrackerDump.cxx | 1 + .../Standalone/Benchmark/standalone.cxx | 25 +- GPU/GPUTracking/Standalone/tools/createGeo.C | 1 + GPU/GPUTracking/Standalone/tools/createLUT.C | 1 + .../{Array2D.h => CfArray2D.h} | 18 +- .../{ChargePos.h => CfChargePos.h} | 14 +- GPU/GPUTracking/TPCClusterFinder/CfUtils.h | 18 +- .../TPCClusterFinder/ClusterAccumulator.cxx | 4 +- .../TPCClusterFinder/ClusterAccumulator.h | 8 +- .../GPUTPCCFChargeMapFiller.cxx | 16 +- .../GPUTPCCFChargeMapFiller.h | 8 +- .../GPUTPCCFCheckPadBaseline.cxx | 14 +- .../GPUTPCCFCheckPadBaseline.h | 2 +- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 2 +- .../TPCClusterFinder/GPUTPCCFClusterizer.h | 12 +- .../TPCClusterFinder/GPUTPCCFClusterizer.inc | 16 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 14 +- .../GPUTPCCFDeconvolution.cxx | 14 +- .../TPCClusterFinder/GPUTPCCFDeconvolution.h | 6 +- .../GPUTPCCFNoiseSuppression.cxx | 32 +-- .../GPUTPCCFNoiseSuppression.h | 12 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.cxx | 20 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.h | 10 +- .../GPUTPCCFStreamCompaction.cxx | 4 +- .../TPCClusterFinder/GPUTPCClusterFinder.cxx | 6 +- .../TPCClusterFinder/GPUTPCClusterFinder.h | 8 +- .../GPUTPCClusterFinderDump.cxx | 8 +- .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 1 + .../TPCClusterFinder/GPUTPCNNClusterizer.h | 2 +- .../GPUTPCNNClusterizerKernels.cxx | 28 +- .../GPUTPCNNClusterizerKernels.h | 4 +- .../TPCClusterFinder/MCLabelAccumulator.cxx | 2 +- .../TPCClusterFinder/MCLabelAccumulator.h | 6 +- GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx | 1 + GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 1 + GPU/GPUTracking/display/GPUDisplay.cxx | 3 +- GPU/GPUTracking/display/GPUDisplay.h | 16 +- .../display/backend/GPUDisplayBackend.cxx | 1 + .../backend/GPUDisplayBackendOpenGL.cxx | 2 + .../backend/GPUDisplayBackendVulkan.cxx | 2 + .../display/frontend/GPUDisplayFrontend.cxx | 3 + .../frontend/GPUDisplayFrontendWayland.cxx | 1 + .../display/frontend/GPUDisplayInfo.inc | 36 +++ .../display/frontend/GPUDisplayKeys.cxx | 1 + .../display/helpers/GPUDisplayAnimation.cxx | 2 + .../GPUDisplayBackendOpenGLMagneticField.cxx | 1 + .../display/helpers/GPUDisplayHelpers.cxx | 3 + .../helpers/GPUDisplayInterpolation.cxx | 1 + .../display/helpers/GPUDisplayLoader.cxx | 1 + .../display/helpers/GPUDisplayROOT.cxx | 3 + .../display/render/GPUDisplayDraw.cxx | 3 +- .../display/render/GPUDisplayImportEvent.cxx | 1 + GPU/GPUTracking/kernels.cmake | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 3 +- GPU/GPUTracking/qa/genEvents.cxx | 3 +- 105 files changed, 1136 insertions(+), 899 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h delete mode 100644 GPU/GPUTracking/Base/GPUReconstructionKernels.h create mode 100644 GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc create mode 100644 GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h create mode 100644 GPU/GPUTracking/Global/GPUChainTrackingGetters.inc rename GPU/GPUTracking/TPCClusterFinder/{Array2D.h => CfArray2D.h} (81%) rename GPU/GPUTracking/TPCClusterFinder/{ChargePos.h => CfChargePos.h} (80%) create mode 100644 GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc diff --git a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx index 424657ac19426..375fa732007cc 100644 --- a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx +++ b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx @@ -51,6 +51,8 @@ #include "GPUTRDTrackletWord.h" #include "GPUTRDInterfaces.h" #include "GPUTRDGeometry.h" +#include "GPUConstantMem.h" +#include "GPUTRDTrackerKernels.h" #ifdef ENABLE_UPGRADES #include "ITS3Reconstruction/IOUtils.h" diff --git a/GPU/GPUTracking/Base/GPUProcessor.cxx b/GPU/GPUTracking/Base/GPUProcessor.cxx index 8a18f71d535e3..82627fb00723c 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.cxx +++ b/GPU/GPUTracking/Base/GPUProcessor.cxx @@ -14,7 +14,7 @@ #include "GPUProcessor.h" #include "GPUReconstruction.h" -#include "GPUReconstructionDeviceBase.h" +#include "GPUSettings.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index acca74e57a80e..c79c743e96ce5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -30,7 +30,9 @@ #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" +#include "GPUConstantMem.h" #include "GPUCommonHelpers.h" +#include "GPUSettings.h" #include "GPUMemoryResource.h" #include "GPUChain.h" @@ -75,10 +77,10 @@ constexpr GPUReconstruction::GeometryType GPUReconstruction::geometryType; static ptrdiff_t ptrDiff(void* a, void* b) { return (char*)a - (char*)b; } -GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg) +GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mGRPSettings(new GPUSettingsGRP), mDeviceBackendSettings(new GPUSettingsDeviceBackend(cfg)), mProcessingSettings(new GPUSettingsProcessing) { if (cfg.master) { - if (cfg.master->mDeviceBackendSettings.deviceType != cfg.deviceType) { + if (cfg.master->GetDeviceBackendSettings().deviceType != cfg.deviceType) { throw std::invalid_argument("device type of master and slave GPUReconstruction does not match"); } if (cfg.master->mMaster) { @@ -87,7 +89,7 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos mMaster = cfg.master; cfg.master->mSlaves.emplace_back(this); } - param().SetDefaults(&mGRPSettings); + param().SetDefaults(mGRPSettings.get()); mMemoryScalers.reset(new GPUMemorySizeScalers); for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place @@ -148,7 +150,7 @@ int32_t GPUReconstruction::Init() if (InitDevice()) { return 1; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; } else { @@ -213,7 +215,7 @@ static uint32_t getDefaultNThreads() int32_t GPUReconstruction::InitPhaseBeforeDevice() { - if (mProcessingSettings.printSettings) { + if (GetProcessingSettings().printSettings) { if (mSlaves.size() || mMaster) { printf("\nConfig Dump %s\n", mMaster ? "Slave" : "Master"); } @@ -223,7 +225,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() break; } } - GPUConfigDump::dumpConfig(¶m().rec, &mProcessingSettings, chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, &mDeviceBackendSettings, &mRecoSteps); + GPUConfigDump::dumpConfig(¶m().rec, mProcessingSettings.get(), chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, mDeviceBackendSettings.get(), &mRecoSteps); } mRecoSteps.stepsGPUMask &= mRecoSteps.steps; mRecoSteps.stepsGPUMask &= AvailableGPURecoSteps(); @@ -231,95 +233,95 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mRecoSteps.stepsGPUMask.set((uint8_t)0); } - if (mProcessingSettings.forceMemoryPoolSize >= 1024 || mProcessingSettings.forceHostMemoryPoolSize >= 1024) { - mProcessingSettings.memoryAllocationStrategy = GPUMemoryResource::ALLOCATION_GLOBAL; + if (GetProcessingSettings().forceMemoryPoolSize >= 1024 || GetProcessingSettings().forceHostMemoryPoolSize >= 1024) { + mProcessingSettings->memoryAllocationStrategy = GPUMemoryResource::ALLOCATION_GLOBAL; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_AUTO) { - mProcessingSettings.memoryAllocationStrategy = IsGPU() ? GPUMemoryResource::ALLOCATION_GLOBAL : GPUMemoryResource::ALLOCATION_INDIVIDUAL; + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_AUTO) { + mProcessingSettings->memoryAllocationStrategy = IsGPU() ? GPUMemoryResource::ALLOCATION_GLOBAL : GPUMemoryResource::ALLOCATION_INDIVIDUAL; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mProcessingSettings.forceMemoryPoolSize = mProcessingSettings.forceHostMemoryPoolSize = 0; + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + mProcessingSettings->forceMemoryPoolSize = mProcessingSettings->forceHostMemoryPoolSize = 0; } - if (mProcessingSettings.debugLevel >= 4) { - mProcessingSettings.keepAllMemory = true; + if (GetProcessingSettings().debugLevel >= 4) { + mProcessingSettings->keepAllMemory = true; } - if (mProcessingSettings.debugLevel >= 5 && mProcessingSettings.allocDebugLevel < 2) { - mProcessingSettings.allocDebugLevel = 2; + if (GetProcessingSettings().debugLevel >= 5 && GetProcessingSettings().allocDebugLevel < 2) { + mProcessingSettings->allocDebugLevel = 2; } - if (mProcessingSettings.eventDisplay || mProcessingSettings.keepAllMemory) { - mProcessingSettings.keepDisplayMemory = true; + if (GetProcessingSettings().eventDisplay || GetProcessingSettings().keepAllMemory) { + mProcessingSettings->keepDisplayMemory = true; } - if (mProcessingSettings.debugLevel < 6) { - mProcessingSettings.debugMask = 0; + if (GetProcessingSettings().debugLevel < 6) { + mProcessingSettings->debugMask = 0; } - if (mProcessingSettings.debugLevel < 1) { - mProcessingSettings.deviceTimers = false; + if (GetProcessingSettings().debugLevel < 1) { + mProcessingSettings->deviceTimers = false; } - if (mProcessingSettings.debugLevel > 0) { - mProcessingSettings.recoTaskTiming = true; + if (GetProcessingSettings().debugLevel > 0) { + mProcessingSettings->recoTaskTiming = true; } - if (mProcessingSettings.deterministicGPUReconstruction == -1) { - mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6; + if (GetProcessingSettings().deterministicGPUReconstruction == -1) { + mProcessingSettings->deterministicGPUReconstruction = GetProcessingSettings().debugLevel >= 6; } - if (mProcessingSettings.deterministicGPUReconstruction) { + if (GetProcessingSettings().deterministicGPUReconstruction) { #ifndef GPUCA_DETERMINISTIC_MODE GPUError("WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif - mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; + mProcessingSettings->overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; if (param().rec.tpc.looperInterpolationInExtraPass == -1) { param().rec.tpc.looperInterpolationInExtraPass = 0; } - if (mProcessingSettings.createO2Output > 1) { - mProcessingSettings.createO2Output = 1; + if (GetProcessingSettings().createO2Output > 1) { + mProcessingSettings->createO2Output = 1; } - mProcessingSettings.rtc.deterministic = 1; + mProcessingSettings->rtc.deterministic = 1; } else { #ifdef GPUCA_DETERMINISTIC_MODE GPUError("WARNING, compiled with GPUCA_DETERMINISTIC_MODE but deterministicGPUReconstruction not set, only compile-time determinism and deterministic math enforced, not fully deterministic!"); #endif } - if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { - mProcessingSettings.nTPCClustererLanes = 1; + if (GetProcessingSettings().deterministicGPUReconstruction && GetProcessingSettings().debugLevel >= 6) { + mProcessingSettings->nTPCClustererLanes = 1; } - if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) { - mProcessingSettings.createO2Output = 1; + if (GetProcessingSettings().createO2Output > 1 && GetProcessingSettings().runQA && GetProcessingSettings().qcRunFraction == 100.f) { + mProcessingSettings->createO2Output = 1; } - if (!mProcessingSettings.createO2Output || !IsGPU()) { - mProcessingSettings.clearO2OutputFromGPU = false; + if (!GetProcessingSettings().createO2Output || !IsGPU()) { + mProcessingSettings->clearO2OutputFromGPU = false; } if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) { - mProcessingSettings.mergerSortTracks = false; + mProcessingSettings->mergerSortTracks = false; } - if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) { - mProcessingSettings.delayedOutput = false; + if (GetProcessingSettings().debugLevel > 3 || !IsGPU() || GetProcessingSettings().deterministicGPUReconstruction) { + mProcessingSettings->delayedOutput = false; } - if (!mProcessingSettings.rtc.enable) { - mProcessingSettings.rtc.optConstexpr = false; + if (!GetProcessingSettings().rtc.enable) { + mProcessingSettings->rtc.optConstexpr = false; } - mMemoryScalers->factor = mProcessingSettings.memoryScalingFactor; - mMemoryScalers->conservative = mProcessingSettings.conservativeMemoryEstimate; - mMemoryScalers->returnMaxVal = mProcessingSettings.forceMaxMemScalers != 0; - if (mProcessingSettings.forceMaxMemScalers > 1) { - mMemoryScalers->rescaleMaxMem(mProcessingSettings.forceMaxMemScalers); + mMemoryScalers->factor = GetProcessingSettings().memoryScalingFactor; + mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate; + mMemoryScalers->returnMaxVal = GetProcessingSettings().forceMaxMemScalers != 0; + if (GetProcessingSettings().forceMaxMemScalers > 1) { + mMemoryScalers->rescaleMaxMem(GetProcessingSettings().forceMaxMemScalers); } - if (mProcessingSettings.nHostThreads != -1 && mProcessingSettings.ompThreads != -1) { + if (GetProcessingSettings().nHostThreads != -1 && GetProcessingSettings().ompThreads != -1) { GPUFatal("Must not use both nHostThreads and ompThreads at the same time!"); - } else if (mProcessingSettings.ompThreads != -1) { - mProcessingSettings.nHostThreads = mProcessingSettings.ompThreads; + } else if (GetProcessingSettings().ompThreads != -1) { + mProcessingSettings->nHostThreads = GetProcessingSettings().ompThreads; GPUWarning("You are using the deprecated ompThreads option, please switch to nHostThreads!"); } - if (mProcessingSettings.nHostThreads <= 0) { - mProcessingSettings.nHostThreads = internal::getDefaultNThreads(); + if (GetProcessingSettings().nHostThreads <= 0) { + mProcessingSettings->nHostThreads = internal::getDefaultNThreads(); } else { - mProcessingSettings.autoAdjustHostThreads = false; + mProcessingSettings->autoAdjustHostThreads = false; } - mMaxHostThreads = mProcessingSettings.nHostThreads; + mMaxHostThreads = GetProcessingSettings().nHostThreads; if (mMaster == nullptr) { mThreading = std::make_shared(); mThreading->control = std::make_unique(tbb::global_control::max_allowed_parallelism, mMaxHostThreads); @@ -330,26 +332,26 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } mMaxBackendThreads = std::max(mMaxBackendThreads, mMaxHostThreads); if (IsGPU()) { - mNStreams = std::max(mProcessingSettings.nStreams, 3); + mNStreams = std::max(GetProcessingSettings().nStreams, 3); } - if (mProcessingSettings.nTPCClustererLanes == -1) { - mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSECTORS, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads)); + if (GetProcessingSettings().nTPCClustererLanes == -1) { + mProcessingSettings->nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSECTORS, GetProcessingSettings().inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads)); } - if (mProcessingSettings.overrideClusterizerFragmentLen == -1) { - mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; + if (GetProcessingSettings().overrideClusterizerFragmentLen == -1) { + mProcessingSettings->overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / GetProcessingSettings().nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; } - if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSECTORS) { - GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes); - mProcessingSettings.nTPCClustererLanes = GPUCA_NSECTORS; + if (GetProcessingSettings().nTPCClustererLanes > GPUCA_NSECTORS) { + GPUError("Invalid value for nTPCClustererLanes: %d", GetProcessingSettings().nTPCClustererLanes); + mProcessingSettings->nTPCClustererLanes = GPUCA_NSECTORS; } - if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { + if (GetProcessingSettings().doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { GPUError("Must use double pipeline mode only with exactly one chain that must support it"); return 1; } - if (mMaster == nullptr && mProcessingSettings.doublePipeline) { + if (mMaster == nullptr && GetProcessingSettings().doublePipeline) { mPipelineContext.reset(new GPUReconstructionPipelineContext); } @@ -367,16 +369,16 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mDeviceMemorySize += memPrimary; mHostMemorySize += memPageLocked; } - if (mProcessingSettings.forceMemoryPoolSize && mProcessingSettings.forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) { - mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize; - } else if (mProcessingSettings.forceMemoryPoolSize > 2) { - mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize; + if (GetProcessingSettings().forceMemoryPoolSize && GetProcessingSettings().forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) { + mDeviceMemorySize = GetProcessingSettings().forceMemoryPoolSize; + } else if (GetProcessingSettings().forceMemoryPoolSize > 2) { + mDeviceMemorySize = GetProcessingSettings().forceMemoryPoolSize; if (!IsGPU() || mOutputControl.useInternal()) { mHostMemorySize = mDeviceMemorySize; } } - if (mProcessingSettings.forceHostMemoryPoolSize) { - mHostMemorySize = mProcessingSettings.forceHostMemoryPoolSize; + if (GetProcessingSettings().forceHostMemoryPoolSize) { + mHostMemorySize = GetProcessingSettings().forceHostMemoryPoolSize; } for (uint32_t i = 0; i < mProcessors.size(); i++) { @@ -399,7 +401,7 @@ int32_t GPUReconstruction::InitPhasePermanentMemory() int32_t GPUReconstruction::InitPhaseAfterDevice() { - if (mProcessingSettings.forceMaxMemScalers <= 1 && mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().forceMaxMemScalers <= 1 && GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mMemoryScalers->rescaleMaxMem(IsGPU() ? mDeviceMemorySize : mHostMemorySize); } for (uint32_t i = 0; i < mChains.size(); i++) { @@ -446,7 +448,7 @@ int32_t GPUReconstruction::Exit() mChains.clear(); // Make sure we destroy a possible ITS GPU tracker before we call the destructors mHostConstantMem.reset(); // Reset these explicitly before the destruction of other members unloads the library - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { for (uint32_t i = 0; i < mMemoryResources.size(); i++) { if (mMemoryResources[i].mReuse >= 0) { continue; @@ -481,9 +483,38 @@ void GPUReconstruction::ComputeReuseMax(GPUProcessor* proc) } } +int16_t GPUReconstruction::RegisterMemoryAllocationHelper(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re) +{ + if (!(type & (GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU))) { + if ((type & GPUMemoryResource::MEMORY_SCRATCH) && !GetProcessingSettings().keepDisplayMemory) { // keepAllMemory --> keepDisplayMemory + type |= (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU ? GPUMemoryResource::MEMORY_HOST : GPUMemoryResource::MEMORY_GPU); + } else { + type |= GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU; + } + } + if (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU) { + type &= ~GPUMemoryResource::MEMORY_GPU; + } + mMemoryResources.emplace_back(proc, setPtr, (GPUMemoryResource::MemoryType)type, name); + if (mMemoryResources.size() >= 32768) { + throw std::bad_alloc(); + } + uint16_t retVal = mMemoryResources.size() - 1; + if (re.type != GPUMemoryReuse::NONE && !GetProcessingSettings().disableMemoryReuse) { + const auto& it = mMemoryReuse1to1.find(re.id); + if (it == mMemoryReuse1to1.end()) { + mMemoryReuse1to1[re.id] = {proc, retVal}; + } else { + mMemoryResources[retVal].mReuse = it->second.res[0]; + it->second.res.emplace_back(retVal); + } + } + return retVal; +} + size_t GPUReconstruction::AllocateRegisteredMemory(GPUProcessor* proc, bool resetCustom) { - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating memory %p", (void*)proc); } size_t total = 0; @@ -496,7 +527,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(GPUProcessor* proc, bool rese } } } - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating memory done"); } return total; @@ -504,7 +535,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(GPUProcessor* proc, bool rese size_t GPUReconstruction::AllocateRegisteredPermanentMemory() { - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating Permanent Memory"); } int32_t total = 0; @@ -515,7 +546,7 @@ size_t GPUReconstruction::AllocateRegisteredPermanentMemory() } mHostMemoryPermanent = mHostMemoryPool; mDeviceMemoryPermanent = mDeviceMemoryPool; - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Permanent Memory Done"); } return total; @@ -534,7 +565,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, GPUError("Insufficient reuse memory %lu < %lu (%s) (%s)", mMemoryResources[res->mReuse].mSize, retVal, res->mName, device); throw std::bad_alloc(); } - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Reused (" << device << ") " << res->mName << ": " << retVal << "\n"; } return retVal; @@ -568,7 +599,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " > " << memorysize << "\n"; throw std::bad_alloc(); } - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (" << device << ") " << res->mName << ": " << retVal << " - available: " << (memorypoolend ? ptrDiff(memorypoolend, memorypool) : (memorysize - ptrDiff(memorypool, memorybase))) << "\n"; } return retVal; @@ -576,7 +607,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool) { - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) { if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { if (res->mPtrDevice && res->mReuse < 0) { operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); @@ -593,7 +624,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, } res->mPtr = GPUProcessor::alignPointer(res->mPtrDevice); res->SetPointers(res->mPtr); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << (res->mReuse >= 0 ? "Reused " : "Allocated ") << res->mName << ": " << res->mSize << "\n"; } if (res->mType & GPUMemoryResource::MEMORY_STACK) { @@ -612,13 +643,13 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, if (IsGPU() && res->mOverrideSize < GPUCA_BUFFER_ALIGNMENT) { res->mOverrideSize = GPUCA_BUFFER_ALIGNMENT; } - if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || mProcessingSettings.keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory + if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || GetProcessingSettings().keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory if (control && control->useExternal()) { if (control->allocator) { res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize); res->mPtr = control->allocator(CAMath::nextMultipleOf(res->mSize)); res->mSize = std::max(ptrDiff(res->SetPointers(res->mPtr), res->mPtr), res->mOverrideSize); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (from callback) " << res->mName << ": " << res->mSize << "\n"; } } else { @@ -676,7 +707,7 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) { throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); return GPUProcessor::alignPointer(mUnmanagedChunks.back().get()); } else { @@ -689,7 +720,7 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) throw std::bad_alloc(); } UpdateMaxMemoryUsed(); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n"; } return retVal; @@ -711,7 +742,7 @@ void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size) throw std::bad_alloc(); } UpdateMaxMemoryUsed(); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; } @@ -773,10 +804,10 @@ void GPUReconstruction::FreeRegisteredMemory(int16_t ires) void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) { - if (mProcessingSettings.allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) { + if (GetProcessingSettings().allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) { std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n"; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) { operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = nullptr; @@ -789,7 +820,7 @@ void GPUReconstruction::ReturnVolatileDeviceMemory() mDeviceMemoryPool = mVolatileMemoryStart; mVolatileMemoryStart = nullptr; } - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; } } @@ -807,7 +838,7 @@ void GPUReconstruction::PushNonPersistentMemory(uint64_t tag) void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) { - if (mProcessingSettings.keepDisplayMemory || mProcessingSettings.disableMemoryReuse) { + if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().disableMemoryReuse) { return; } if (mNonPersistentMemoryStack.size() == 0) { @@ -816,7 +847,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) { GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); } - if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) { + if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) { printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); PrintMemoryOverview(); printf("%76s", ""); @@ -872,7 +903,7 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); mVolatileMemoryStart = nullptr; - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); @@ -895,7 +926,7 @@ void GPUReconstruction::PrintMemoryMax() void GPUReconstruction::PrintMemoryOverview() { - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n", ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), ptrDiff(mHostMemoryPool, mHostMemoryPermanent), ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), ptrDiff(mDeviceMemoryPool, mDeviceMemoryPermanent), ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), @@ -934,7 +965,7 @@ void GPUReconstruction::PrintMemoryStatistics() int32_t GPUReconstruction::registerMemoryForGPU(const void* ptr, size_t size) { - if (mProcessingSettings.noGPUMemoryRegistration) { + if (GetProcessingSettings().noGPUMemoryRegistration) { return 0; } int32_t retVal = registerMemoryForGPU_internal(ptr, size); @@ -946,7 +977,7 @@ int32_t GPUReconstruction::registerMemoryForGPU(const void* ptr, size_t size) int32_t GPUReconstruction::unregisterMemoryForGPU(const void* ptr) { - if (mProcessingSettings.noGPUMemoryRegistration) { + if (GetProcessingSettings().noGPUMemoryRegistration) { return 0; } const auto& pos = mRegisteredMemoryPtrs.find(ptr); @@ -982,10 +1013,10 @@ int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) void GPUReconstruction::RunPipelineWorker() { - if (!mInitialized || !mProcessingSettings.doublePipeline || mMaster != nullptr || !mSlaves.size()) { + if (!mInitialized || !GetProcessingSettings().doublePipeline || mMaster != nullptr || !mSlaves.size()) { throw std::invalid_argument("Cannot start double pipeline mode"); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Pipeline worker started"); } bool terminate = false; @@ -1011,7 +1042,7 @@ void GPUReconstruction::RunPipelineWorker() } q->c.notify_one(); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Pipeline worker ended"); } } @@ -1107,7 +1138,7 @@ void GPUReconstruction::DumpSettings(const char* dir) std::string f; f = dir; f += "settings.dump"; - DumpStructToFile(&mGRPSettings, f.c_str()); + DumpStructToFile(mGRPSettings.get(), f.c_str()); for (uint32_t i = 0; i < mChains.size(); i++) { mChains[i]->DumpSettings(dir); } @@ -1121,11 +1152,11 @@ void GPUReconstruction::UpdateDynamicSettings(const GPUSettingsRecDynamic* d) void GPUReconstruction::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p, const GPUSettingsRecDynamic* d) { if (g) { - mGRPSettings = *g; + *mGRPSettings = *g; } if (p) { - mProcessingSettings.debugLevel = p->debugLevel; - mProcessingSettings.resetTimers = p->resetTimers; + mProcessingSettings->debugLevel = p->debugLevel; + mProcessingSettings->resetTimers = p->resetTimers; } GPURecoStepConfiguration* w = nullptr; if (mRecoSteps.steps.isSet(GPUDataTypes::RecoStep::TPCdEdx)) { @@ -1142,11 +1173,11 @@ int32_t GPUReconstruction::ReadSettings(const char* dir) std::string f; f = dir; f += "settings.dump"; - new (&mGRPSettings) GPUSettingsGRP; - if (ReadStructFromFile(f.c_str(), &mGRPSettings)) { + new (mGRPSettings.get()) GPUSettingsGRP; + if (ReadStructFromFile(f.c_str(), mGRPSettings.get())) { return 1; } - param().UpdateSettings(&mGRPSettings); + param().UpdateSettings(mGRPSettings.get()); for (uint32_t i = 0; i < mChains.size(); i++) { mChains[i]->ReadSettings(dir); } @@ -1173,9 +1204,9 @@ void GPUReconstruction::SetSettings(const GPUSettingsGRP* grp, const GPUSettings GPUError("Cannot update settings while initialized"); throw std::runtime_error("Settings updated while initialized"); } - mGRPSettings = *grp; + *mGRPSettings = *grp; if (proc) { - mProcessingSettings = *proc; + *mProcessingSettings = *proc; } if (workflow) { mRecoSteps.steps = workflow->steps; @@ -1183,7 +1214,7 @@ void GPUReconstruction::SetSettings(const GPUSettingsGRP* grp, const GPUSettings mRecoSteps.inputs = workflow->inputs; mRecoSteps.outputs = workflow->outputs; } - param().SetDefaults(&mGRPSettings, rec, proc, workflow); + param().SetDefaults(mGRPSettings.get(), rec, proc, workflow); } void GPUReconstruction::SetOutputControl(void* ptr, size_t size) @@ -1193,10 +1224,14 @@ void GPUReconstruction::SetOutputControl(void* ptr, size_t size) SetOutputControl(outputControl); } -void GPUReconstruction::SetInputControl(void* ptr, size_t size) -{ - mInputControl.set(ptr, size); -} +void GPUReconstruction::SetInputControl(void* ptr, size_t size) { mInputControl.set(ptr, size); } +GPUReconstruction::DeviceType GPUReconstruction::GetDeviceType() const { return (DeviceType)GetDeviceBackendSettings().deviceType; } +const GPUParam& GPUReconstruction::GetParam() const { return mHostConstantMem->param; } +void GPUReconstruction::SetResetTimers(bool reset) { mProcessingSettings->resetTimers = reset; } +void GPUReconstruction::SetDebugLevelTmp(int32_t level) { mProcessingSettings->debugLevel = level; } +GPUParam& GPUReconstruction::param() { return mHostConstantMem->param; } +const GPUTrackingInOutPointers GPUReconstruction::GetIOPtrs() const { return mHostConstantMem->ioPtrs; } +const GPUCalibObjectsConst& GPUReconstruction::GetCalib() const { return processors()->calibObjects; } ThrustVolatileAllocator::ThrustVolatileAllocator(GPUReconstruction* r) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 23fb6e4d9ff06..b6256f7f8ad82 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -25,13 +25,13 @@ #include #include -#include "GPUTRDDef.h" -#include "GPUParam.h" -#include "GPUSettings.h" -#include "GPUOutputControl.h" +#include "GPUDataTypes.h" #include "GPUMemoryResource.h" -#include "GPUConstantMem.h" -#include "GPULogging.h" +#include "GPUOutputControl.h" + +/*#include "GPUParam.h" +#include "GPUSettings.h" +#include "GPULogging.h"*/ namespace o2::its { @@ -49,6 +49,13 @@ struct GPUReconstructionThreading; class GPUROOTDumpCore; class ThrustVolatileAllocator; struct GPUDefParameters; +class GPUMemoryResource; +struct GPUSettingsDeviceBackend; +struct GPUSettingsGRP; +struct GPUSettingsProcessing; +struct GPUSettingsRec; +struct GPUSettingsRecDynamic; +struct GPUMemoryReuse; namespace gpu_reconstruction_kernels { @@ -186,18 +193,20 @@ class GPUReconstruction bool slavesExist() { return mSlaves.size() || mMaster; } // Getters / setters for parameters - DeviceType GetDeviceType() const { return (DeviceType)mDeviceBackendSettings.deviceType; } + DeviceType GetDeviceType() const; bool IsGPU() const { return GetDeviceType() != DeviceType::INVALID_DEVICE && GetDeviceType() != DeviceType::CPU; } - const GPUParam& GetParam() const { return mHostConstantMem->param; } + const GPUParam& GetParam() const; const GPUConstantMem& GetConstantMem() const { return *mHostConstantMem; } - const GPUSettingsGRP& GetGRPSettings() const { return mGRPSettings; } - const GPUSettingsDeviceBackend& GetDeviceBackendSettings() { return mDeviceBackendSettings; } - const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; } + const GPUTrackingInOutPointers GetIOPtrs() const; + const GPUSettingsGRP& GetGRPSettings() const { return *mGRPSettings; } + const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return *mDeviceBackendSettings; } + const GPUSettingsProcessing& GetProcessingSettings() const { return *mProcessingSettings; } + const GPUCalibObjectsConst& GetCalib() const; bool IsInitialized() const { return mInitialized; } void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr); void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr); - void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() - void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings() + void SetResetTimers(bool reset); // May update also after Init() + void SetDebugLevelTmp(int32_t level); // Temporarily, before calling SetSettings() void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr); void UpdateDynamicSettings(const GPUSettingsRecDynamic* d); void SetOutputControl(const GPUOutputControl& v) { mOutputControl = v; } @@ -272,6 +281,7 @@ class GPUReconstruction size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr* mem, InOutPointerType type, T** nonConstPtrs = nullptr); template T* AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr& u); + int16_t RegisterMemoryAllocationHelper(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re); // Private helper functions to dump / load flat objects template @@ -292,17 +302,17 @@ class GPUReconstruction // Pointers to tracker classes GPUConstantMem* processors() { return mHostConstantMem.get(); } const GPUConstantMem* processors() const { return mHostConstantMem.get(); } - GPUParam& param() { return mHostConstantMem->param; } + GPUParam& param(); std::unique_ptr mHostConstantMem; GPUConstantMem* mDeviceConstantMem = nullptr; // Settings - GPUSettingsGRP mGRPSettings; // Global Run Parameters - GPUSettingsDeviceBackend mDeviceBackendSettings; // Processing Parameters (at constructor level) - GPUSettingsProcessing mProcessingSettings; // Processing Parameters (at init level) - GPUOutputControl mOutputControl; // Controls the output of the individual components - GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps - std::unique_ptr mMemoryScalers; // Scalers how much memory will be needed + std::unique_ptr mGRPSettings; // Global Run Parameters + std::unique_ptr mDeviceBackendSettings; // Processing Parameters (at constructor level) + std::unique_ptr mProcessingSettings; // Processing Parameters (at init level) + GPUOutputControl mOutputControl; // Controls the output of the individual components + GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps + std::unique_ptr mMemoryScalers; // Scalers how much memory will be needed GPURecoStepConfiguration mRecoSteps; @@ -392,35 +402,6 @@ class GPUReconstruction static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); }; -template -inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr& u) -{ - if (n == 0) { - u.reset(nullptr); - return nullptr; - } - T* retVal; - if (mInputControl.useExternal()) { - u.reset(nullptr); - mInputControl.checkCurrent(); - GPUProcessor::computePointerWithAlignment(mInputControl.ptrCurrent, retVal, n); - if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) { - throw std::bad_alloc(); - } - } else { - u.reset(new T[n]); - retVal = u.get(); - if (mProcessingSettings.registerStandaloneInputMemory) { - if (registerMemoryForGPU(u.get(), n * sizeof(T))) { - GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (int64_t)(n * sizeof(T))); - throw std::bad_alloc(); - } - } - } - ptr = retVal; - return retVal; -} - template inline T* GPUReconstruction::AddChain(Args... args) { @@ -431,31 +412,7 @@ inline T* GPUReconstruction::AddChain(Args... args) template inline int16_t GPUReconstruction::RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re) { - if (!(type & (GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU))) { - if ((type & GPUMemoryResource::MEMORY_SCRATCH) && !mProcessingSettings.keepDisplayMemory) { // keepAllMemory --> keepDisplayMemory - type |= (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU ? GPUMemoryResource::MEMORY_HOST : GPUMemoryResource::MEMORY_GPU); - } else { - type |= GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU; - } - } - if (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU) { - type &= ~GPUMemoryResource::MEMORY_GPU; - } - mMemoryResources.emplace_back(proc, static_cast(setPtr), (GPUMemoryResource::MemoryType)type, name); - if (mMemoryResources.size() >= 32768) { - throw std::bad_alloc(); - } - uint16_t retVal = mMemoryResources.size() - 1; - if (re.type != GPUMemoryReuse::NONE && !mProcessingSettings.disableMemoryReuse) { - const auto& it = mMemoryReuse1to1.find(re.id); - if (it == mMemoryReuse1to1.end()) { - mMemoryReuse1to1[re.id] = {proc, retVal}; - } else { - mMemoryResources[retVal].mReuse = it->second.res[0]; - it->second.res.emplace_back(retVal); - } - } - return retVal; + return RegisterMemoryAllocationHelper(proc, static_cast(setPtr), type, name, re); } template @@ -471,7 +428,7 @@ inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate) { static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class"); if (allocate) { - proc->SetMaxData(mHostConstantMem->ioPtrs); + proc->SetMaxData(GetIOPtrs()); } if (proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && proc->mLinkedProcessor) { std::memcpy((void*)proc->mLinkedProcessor, (const void*)proc, sizeof(*proc)); diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 39507beda8a55..2453ce4a2328f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -30,19 +30,18 @@ #include "GPUTRDTrackletLabels.h" #include "GPUMemoryResource.h" #include "GPUConstantMem.h" +#include "GPULogging.h" #include "GPUMemorySizeScalers.h" +#include "GPUReconstructionProcessingKernels.inc" + #include #include -#define GPUCA_LOGGING_PRINTF -#include "GPULogging.h" - #ifndef _WIN32 #include #endif using namespace o2::gpu; -using namespace o2::gpu::gpu_reconstruction_kernels; constexpr GPUReconstructionCPU::krnlRunRange GPUReconstructionCPU::krnlRunRangeNone; constexpr GPUReconstructionCPU::krnlEvent GPUReconstructionCPU::krnlEventNone; @@ -55,7 +54,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU() } template -inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -67,7 +66,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu } int32_t nThreads = getNKernelHostThreads(false); if (nThreads > 1) { - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { printf("Running %d Threads\n", mThreading->activeThreads->max_concurrency()); } tbb::this_task_arena::isolate([&] { @@ -89,7 +88,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu } template <> -inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); if (nThreads > 1) { @@ -110,7 +109,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal -void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args) { #pragma GCC diagnostic push #if defined(__clang__) @@ -121,14 +120,14 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs -gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) +GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) { if (gpu == -1) { gpu = IsGPU(); } const auto num = GetKernelNum(); const auto* p = gpu ? mParDevice : mParCPU; - gpu_reconstruction_kernels::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]}; + GPUReconstructionProcessing::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]}; if (ret.nThreads == 0) { ret.nThreads = gpu ? mThreadCount : 1u; } @@ -138,9 +137,9 @@ gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProper return ret; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ - template krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + template void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args); \ + template GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -169,7 +168,7 @@ size_t GPUReconstructionCPU::TransferMemoryResourcesHelper(GPUProcessor* proc, i if (!(res.mType & GPUMemoryResource::MEMORY_GPU) || (res.mType & GPUMemoryResource::MEMORY_CUSTOM_TRANSFER)) { continue; } - if (!mProcessingSettings.keepAllMemory && !all && (res.mType & exc) && !(res.mType & inc)) { + if (!GetProcessingSettings().keepAllMemory && !all && (res.mType & exc) && !(res.mType & inc)) { continue; } if (toGPU) { @@ -197,7 +196,7 @@ int32_t GPUReconstructionCPU::InitDevice() { mActiveHostKernelThreads = mMaxHostThreads; mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { if (mDeviceMemorySize > mHostMemorySize) { mHostMemorySize = mDeviceMemorySize; @@ -207,7 +206,7 @@ int32_t GPUReconstructionCPU::InitDevice() mHostMemoryPermanent = mHostMemoryBase; ClearAllocatedMemory(); } - if (mProcessingSettings.inKernelParallel) { + if (GetProcessingSettings().inKernelParallel) { mBlockCount = mMaxHostThreads; } mProcShadow.mProcessorsProc = processors(); @@ -216,7 +215,7 @@ int32_t GPUReconstructionCPU::InitDevice() int32_t GPUReconstructionCPU::ExitDevice() { - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { operator delete(mHostMemoryBase, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } @@ -232,13 +231,13 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; - if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + if (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) { printf("Allocated memory when starting processing %34s", ""); PrintMemoryOverview(); } mTimerTotal.Start(); const std::clock_t cpuTimerStart = std::clock(); - if (mProcessingSettings.doublePipeline) { + if (GetProcessingSettings().doublePipeline) { int32_t retVal = EnqueuePipeline(); if (retVal) { return retVal; @@ -259,7 +258,7 @@ int32_t GPUReconstructionCPU::RunChains() } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; - if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + if (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) { printf("Allocated memory when ending processing %36s", ""); PrintMemoryOverview(); } @@ -281,7 +280,7 @@ int32_t GPUReconstructionCPU::RunChains() for (int32_t j = 0; j < mTimers[i]->num; j++) { HighResTimer& timer = mTimers[i]->timer[j]; time += timer.GetElapsedTime(); - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { timer.Reset(); } } @@ -297,7 +296,7 @@ int32_t GPUReconstructionCPU::RunChains() snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count); } printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth); - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { mTimers[i]->count = 0; mTimers[i]->memSize = 0; } @@ -317,7 +316,7 @@ int32_t GPUReconstructionCPU::RunChains() printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost); } - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { mTimersRecoSteps[i].bytesToGPU = mTimersRecoSteps[i].bytesToHost = 0; mTimersRecoSteps[i].timerToGPU.Reset(); mTimersRecoSteps[i].timerToHost.Reset(); @@ -340,7 +339,7 @@ int32_t GPUReconstructionCPU::RunChains() } else if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str()); } - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { mStatNEvents = 0; mStatCPUTime = 0; mTimerTotal.Reset(); @@ -366,7 +365,7 @@ void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, cons if (!((size_t)¶m().occupancyTotal - (size_t)¶m().occupancyMap == sizeof(param().occupancyMap) && sizeof(param().occupancyMap) == sizeof(size_t) && sizeof(param().occupancyTotal) < sizeof(size_t))) { throw std::runtime_error("occupancy data not consecutive in GPUParam"); } - const auto threadContext = GetThreadContext(); + const auto holdContext = GetThreadContext(); size_t tmp[2] = {(size_t)mapGPU, 0}; memcpy(&tmp[1], &occupancyTotal, sizeof(occupancyTotal)); WriteToConstantMemory((char*)&processors()->param.occupancyMap - (char*)processors(), &tmp, sizeof(param().occupancyMap) + sizeof(param().occupancyTotal), stream); diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index b37bf2b75f01c..d0d8b05c4af0e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -16,14 +16,9 @@ #define GPURECONSTRUCTIONICPU_H #include "GPUReconstructionProcessing.h" -#include "GPUConstantMem.h" #include #include -#include "GPUGeneralKernels.h" -#include "GPUReconstructionKernelIncludes.h" -#include "GPUReconstructionKernels.h" - namespace Ort { struct SessionOptions; @@ -32,20 +27,7 @@ struct SessionOptions; namespace o2::gpu { -class GPUReconstructionCPUBackend : public GPUReconstructionProcessing -{ - public: - ~GPUReconstructionCPUBackend() override = default; - - protected: - GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing(cfg) {} - template - void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); - template - void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); -}; - -class GPUReconstructionCPU : public GPUReconstructionKernels +class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface { friend GPUReconstruction* GPUReconstruction::GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); friend class GPUChain; @@ -55,10 +37,10 @@ class GPUReconstructionCPU : public GPUReconstructionKernels - void runKernel(krnlSetup&& setup, Args&&... args); template - gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu = -1); + krnlProperties getKernelProperties(int gpu = -1); + template + void runKernelBackend(const krnlSetupArgs& args); virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } @@ -75,21 +57,10 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ - { \ - krnlSetupArgs args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \ - const uint32_t num = GetKernelNum(); \ - if (cpuFallback) { \ - GPUReconstructionCPU::runKernelImpl(num, &args); \ - } else { \ - runKernelImpl(num, &args); \ - } \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL + GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg) {} + + template + void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; } int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; } @@ -132,72 +103,10 @@ class GPUReconstructionCPU : public GPUReconstructionKernels + void runKernelInterface(krnlSetup&& setup, Args const&... args); }; -template -inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) -{ - HighResTimer* t = nullptr; - GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); - if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { - throw std::runtime_error("Failure running general kernel without defining RecoStep"); - } - int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; - uint32_t& nThreads = setup.x.nThreads; - uint32_t& nBlocks = setup.x.nBlocks; - const uint32_t stream = setup.x.stream; - auto prop = getKernelProperties(); - const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads; - const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount)); - if (nBlocks == (uint32_t)-1) { - nBlocks = (nThreads + autoThreads - 1) / autoThreads; - nThreads = autoThreads; - } else if (nBlocks == (uint32_t)-2) { - nBlocks = nThreads; - nThreads = autoThreads; - } else if (nBlocks == (uint32_t)-3) { - nBlocks = autoBlocks; - nThreads = autoThreads; - } else if ((int32_t)nThreads < 0) { - nThreads = cpuFallback ? 1 : -nThreads; - } - if (nThreads > GPUCA_MAX_THREADS) { - throw std::runtime_error("GPUCA_MAX_THREADS exceeded"); - } - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); - } - if (nThreads == 0 || nBlocks == 0) { - return; - } - if (mProcessingSettings.debugLevel >= 1) { - t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream); - if ((!mProcessingSettings.deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) { - t->Start(); - } - } - double deviceTimerTime = 0.; - runKernelImplWrapper(gpu_reconstruction_kernels::classArgument(), cpuFallback, deviceTimerTime, std::forward(setup), std::forward(args)...); - if (GPUDebug(GetKernelName(), stream, mProcessingSettings.serializeGPU & 1)) { - throw std::runtime_error("kernel failure"); - } - if (mProcessingSettings.debugLevel >= 1) { - if (t) { - if (deviceTimerTime != 0.) { - t->AddTime(deviceTimerTime); - if (t->IsRunning()) { - t->Abort(); - } - } else if (t->IsRunning()) { - t->Stop(); - } - } - if (CheckErrorCodes(cpuFallback) && !mProcessingSettings.ignoreNonFatalGPUErrors) { - throw std::runtime_error("kernel error code"); - } - } -} - } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h b/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h new file mode 100644 index 0000000000000..837516a93b6ae --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h @@ -0,0 +1,98 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCPUKernels.h +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONICPUKERNELS_H +#define GPURECONSTRUCTIONICPUKERNELS_H + +#include "GPUReconstructionCPU.h" +#include "GPUSettings.h" +#include "GPULogging.h" + +namespace o2::gpu +{ + +template +inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args const&... args) +{ + HighResTimer* t = nullptr; + GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); + if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { + throw std::runtime_error("Failure running general kernel without defining RecoStep"); + } + int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; + uint32_t& nThreads = setup.x.nThreads; + uint32_t& nBlocks = setup.x.nBlocks; + const uint32_t stream = setup.x.stream; + auto prop = getKernelProperties(); + const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads; + const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount)); + if (nBlocks == (uint32_t)-1) { + nBlocks = (nThreads + autoThreads - 1) / autoThreads; + nThreads = autoThreads; + } else if (nBlocks == (uint32_t)-2) { + nBlocks = nThreads; + nThreads = autoThreads; + } else if (nBlocks == (uint32_t)-3) { + nBlocks = autoBlocks; + nThreads = autoThreads; + } else if ((int32_t)nThreads < 0) { + nThreads = cpuFallback ? 1 : -nThreads; + } + if (nThreads > GPUCA_MAX_THREADS) { + throw std::runtime_error("GPUCA_MAX_THREADS exceeded"); + } + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : (cpuFallback ? "CPU (fallback)" : mDeviceName.c_str())); + } + if (nThreads == 0 || nBlocks == 0) { + return; + } + if (GetProcessingSettings().debugLevel >= 1) { + t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream); + if ((!GetProcessingSettings().deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) { + t->Start(); + } + } + double deviceTimerTime = 0.; + krnlSetupArgs argPack{{}, {{setup.x, setup.y, setup.z}, deviceTimerTime}, {args...}}; + const uint32_t num = GetKernelNum(); + if (cpuFallback) { + GPUReconstructionCPU::runKernelVirtual(num, &argPack); + } else { + runKernelVirtual(num, &argPack); + } + + if (GPUDebug(GetKernelName(), stream, GetProcessingSettings().serializeGPU & 1)) { + throw std::runtime_error("kernel failure"); + } + if (GetProcessingSettings().debugLevel >= 1) { + if (t) { + if (deviceTimerTime != 0.) { + t->AddTime(deviceTimerTime); + if (t->IsRunning()) { + t->Abort(); + } + } else if (t->IsRunning()) { + t->Stop(); + } + } + if (CheckErrorCodes(cpuFallback) && !GetProcessingSettings().ignoreNonFatalGPUErrors) { + throw std::runtime_error("kernel error code"); + } + } +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index e12ca7ec601ad..2dec88393f632 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -25,7 +25,7 @@ #include "GPUO2DataTypes.h" #include "GPUDataTypes.h" #include "GPUTPCGeometry.h" -#include "AliHLTTPCRawCluster.h" +#include "AliHLTTPCRawCluster.h" // TODO: Is this still needed at all, or can it be removed? #include "GPUParam.h" #include "GPULogging.h" #include diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index b389e99a0b2bb..9962bdf3922c1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -14,6 +14,7 @@ #include "GPUReconstructionDeviceBase.h" #include "GPUReconstructionIncludes.h" +#include "GPUConstantMem.h" #include "GPUTPCTracker.h" @@ -93,21 +94,21 @@ int32_t GPUReconstructionDeviceBase::InitDevice() // CPU_SET(0, &mask); // sched_setaffinity(0, sizeof(mask), &mask); - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { GPUError("Individual memory allocation strategy unsupported for device\n"); return (1); } - if (mProcessingSettings.nStreams > GPUCA_MAX_STREAMS) { - GPUError("Too many straems requested %d > %d\n", mProcessingSettings.nStreams, GPUCA_MAX_STREAMS); + if (GetProcessingSettings().nStreams > GPUCA_MAX_STREAMS) { + GPUError("Too many straems requested %d > %d\n", GetProcessingSettings().nStreams, GPUCA_MAX_STREAMS); return (1); } void* semLock = nullptr; - if (mProcessingSettings.globalInitMutex && GetGlobalLock(semLock)) { + if (GetProcessingSettings().globalInitMutex && GetGlobalLock(semLock)) { return (1); } - if (mProcessingSettings.deviceTimers) { + if (GetProcessingSettings().deviceTimers) { AddGPUEvents(mDebugEvents); } @@ -117,7 +118,7 @@ int32_t GPUReconstructionDeviceBase::InitDevice() return (1); } - if (mProcessingSettings.globalInitMutex) { + if (GetProcessingSettings().globalInitMutex) { ReleaseGlobalLock(semLock); } @@ -129,7 +130,7 @@ int32_t GPUReconstructionDeviceBase::InitDevice() mProcShadow.mMemoryResProcessors = RegisterMemoryAllocation(&mProcShadow, &GPUProcessorProcessors::SetPointersDeviceProcessor, GPUMemoryResource::MEMORY_PERMANENT | GPUMemoryResource::MEMORY_HOST, "Processors"); AllocateRegisteredMemory(mProcShadow.mMemoryResProcessors); - if (mMaster == nullptr || mProcessingSettings.debugLevel >= 2) { + if (mMaster == nullptr || GetProcessingSettings().debugLevel >= 2) { GPUInfo("GPU Tracker initialization successfull"); // Verbosity reduced because GPU backend will print GPUImportant message! } @@ -186,13 +187,15 @@ void GPUReconstructionDeviceBase::runConstantRegistrators() size_t GPUReconstructionDeviceBase::TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) { if (!(res->Type() & GPUMemoryResource::MEMORY_GPU)) { - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Skipped transfer of non-GPU memory resource: %s", res->Name()); } return 0; } - if (mProcessingSettings.debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || mProcessingSettings.debugLevel >= 4)) { + if (GetProcessingSettings().debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || GetProcessingSettings().debugLevel >= 4)) { GPUInfo("Copying to %s: %s - %ld bytes", toGPU ? "GPU" : "Host", res->Name(), (int64_t)res->Size()); } return GPUMemCpy(dst, src, res->Size(), stream, toGPU, ev, evList, nEvents); } + +const GPUParam* GPUReconstructionDeviceBase::DeviceParam() const { return &mDeviceConstantMem->param; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index f0e19f588e0f1..c8288f978f6ae 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -22,16 +22,12 @@ namespace o2::gpu { -#if !(defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) -extern template class GPUReconstructionKernels; -#endif - class GPUReconstructionDeviceBase : public GPUReconstructionCPU { public: ~GPUReconstructionDeviceBase() override; - const GPUParam* DeviceParam() const { return &mDeviceConstantMem->param; } + const GPUParam* DeviceParam() const; struct deviceConstantMemRegistration { deviceConstantMemRegistration(void* (*reg)()) { @@ -51,8 +47,6 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); - virtual const GPUTPCTracker* CPUTracker(int32_t iSector) { return &processors()->tpcTrackers[iSector]; } - int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override = 0; size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override; size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionIO.h b/GPU/GPUTracking/Base/GPUReconstructionIO.h index 2208c15846e09..810ebfffe1703 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIO.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIO.h @@ -16,10 +16,40 @@ #define GPURECONSTRUCTIONIO_H #include "GPUReconstruction.h" +#include "GPUSettings.h" namespace o2::gpu { +template +inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr& u) +{ + if (n == 0) { + u.reset(nullptr); + return nullptr; + } + T* retVal; + if (mInputControl.useExternal()) { + u.reset(nullptr); + mInputControl.checkCurrent(); + GPUProcessor::computePointerWithAlignment(mInputControl.ptrCurrent, retVal, n); + if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) { + throw std::bad_alloc(); + } + } else { + u.reset(new T[n]); + retVal = u.get(); + if (GetProcessingSettings().registerStandaloneInputMemory) { + if (registerMemoryForGPU(u.get(), n * sizeof(T))) { + GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (int64_t)(n * sizeof(T))); + throw std::bad_alloc(); + } + } + } + ptr = retVal; + return retVal; +} + template inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type) { @@ -38,7 +68,7 @@ inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, c fwrite(entries[i], sizeof(*entries[i]), num[i], fp); } } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Dumped %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); } return numTotal; @@ -72,7 +102,7 @@ inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, s numTotal += num[i]; } (void)r; - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); } return numTotal; @@ -112,7 +142,7 @@ inline std::unique_ptr GPUReconstruction::ReadFlatObjectFromFile(const char* r = fread((void*)retVal.get(), 1, size[0], fp); r = fread(buf, 1, size[1], fp); fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld bytes from %s", (int64_t)r, file); } retVal->clearInternalBufferPtr(); @@ -151,7 +181,7 @@ inline std::unique_ptr GPUReconstruction::ReadStructFromFile(const char* file std::unique_ptr newObj(new T); r = fread(newObj.get(), 1, size, fp); fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld bytes from %s", (int64_t)r, file); } return newObj; @@ -172,7 +202,7 @@ inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) } r = fread(obj, 1, size, fp); fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld bytes from %s", (int64_t)r, file); } return 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index b3f6c6ec817fd..2b16dfb32fe14 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -65,7 +65,7 @@ // GPU Host wrappers for kernel #define GPUCA_KRNL_HOST(x_class, ...) \ GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal { \ + template <> class GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h deleted file mode 100644 index 7f500d471de1f..0000000000000 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionKernels.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONKERNELS_H -#define GPURECONSTRUCTIONKERNELS_H - -#include "GPUReconstruction.h" - -namespace o2::gpu -{ - -namespace gpu_reconstruction_kernels -{ - -template -struct classArgument { - using t = T; - static constexpr int32_t i = I; -}; - -struct krnlExec { - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} - uint32_t nBlocks; - uint32_t nThreads; - int32_t stream; - GPUReconstruction::krnlDeviceType device; - GPUDataTypes::RecoStep step; -}; -struct krnlRunRange { - constexpr krnlRunRange() = default; - constexpr krnlRunRange(uint32_t v) : index(v) {} - uint32_t index = 0; -}; -struct krnlEvent { - constexpr krnlEvent(deviceEvent* e = nullptr, deviceEvent* el = nullptr, int32_t n = 1) : ev(e), evList(el), nEvents(n) {} - deviceEvent* ev; - deviceEvent* evList; - int32_t nEvents; -}; - -struct krnlProperties { - krnlProperties(int32_t t = 0, int32_t b = 1, int32_t b2 = 0) : nThreads(t), minBlocks(b), forceBlocks(b2) {} - uint32_t nThreads; - uint32_t minBlocks; - uint32_t forceBlocks; - uint32_t total() { return forceBlocks ? forceBlocks : (nThreads * minBlocks); } -}; - -struct krnlSetup { - krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} - krnlExec x; - krnlRunRange y; - krnlEvent z; -}; - -struct krnlSetupTime : public krnlSetup { - double& t; -}; - -template -struct krnlSetupArgs : public gpu_reconstruction_kernels::classArgument { - krnlSetupArgs(const krnlExec& xx, const krnlRunRange& yy, const krnlEvent& zz, double& tt, const Args&... args) : s{{xx, yy, zz}, tt}, v(args...) {} - const krnlSetupTime s; - std::tuple sizeof(void*)), const Args&, const Args>::type...> v; -}; - -} // namespace gpu_reconstruction_kernels - -template -class GPUReconstructionKernels : public T -{ - public: - GPUReconstructionKernels(const GPUSettingsDeviceBackend& cfg) : T(cfg) {} - - protected: - using deviceEvent = gpu_reconstruction_kernels::deviceEvent; - using krnlExec = gpu_reconstruction_kernels::krnlExec; - using krnlRunRange = gpu_reconstruction_kernels::krnlRunRange; - using krnlEvent = gpu_reconstruction_kernels::krnlEvent; - using krnlSetup = gpu_reconstruction_kernels::krnlSetup; - using krnlSetupTime = gpu_reconstruction_kernels::krnlSetupTime; - template - using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; - - virtual void runKernelImpl(const int num, const void* args) - { - switch (num) { // clang-format off -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ - case x_num: { \ - const auto& args2 = *(const krnlSetupArgs*)args; \ - T::template runKernelBackend(args2); \ - break; \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - } // clang-format on - } -}; - -} // namespace o2::gpu - -#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index aa01d26446b56..89517c612403b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -24,6 +24,7 @@ #include "GPUReconstruction.h" #include "GPUReconstructionAvailableBackends.h" +#include "GPUSettings.h" #include "utils/qlibload.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index d02309f66c762..a511102a492ef 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -15,6 +15,9 @@ #include "GPUReconstructionProcessing.h" #include "GPUReconstructionThreading.h" #include "GPUDefParametersLoad.inc" +#include "GPUReconstructionKernelIncludes.h" +#include "GPUSettings.h" +#include "GPULogging.h" using namespace o2::gpu; @@ -41,7 +44,7 @@ GPUReconstructionProcessing::~GPUReconstructionProcessing() int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) { int32_t nThreads = 0; - if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { + if (GetProcessingSettings().inKernelParallel == 2 && mNActiveThreadsOuterLoop) { if (splitCores) { nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; @@ -50,7 +53,7 @@ int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) } nThreads = std::max(1, nThreads); } else { - nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; + nThreads = GetProcessingSettings().inKernelParallel ? mMaxHostThreads : 1; } return nThreads; } @@ -59,7 +62,7 @@ void GPUReconstructionProcessing::SetNActiveThreads(int32_t n) { mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); } } @@ -80,12 +83,12 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) { - if (condition && mProcessingSettings.inKernelParallel != 1) { - mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; + if (condition && GetProcessingSettings().inKernelParallel != 1) { + mNActiveThreadsOuterLoop = GetProcessingSettings().inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; } else { mNActiveThreadsOuterLoop = 1; } - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); } return mNActiveThreadsOuterLoop; @@ -132,9 +135,9 @@ uint32_t GPUReconstructionProcessing::getNextTimerId() return id.fetch_add(1); } -std::unique_ptr GPUReconstructionProcessing::GetThreadContext() +std::unique_ptr GPUReconstructionProcessing::GetThreadContext() { - return std::make_unique(); + return std::make_unique(); } gpu_reconstruction_kernels::threadContext::threadContext() = default; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4ce8bc1b42743..9e611e57148c6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -16,7 +16,6 @@ #define GPURECONSTRUCTIONPROCESSING_H #include "GPUReconstruction.h" -#include "GPUReconstructionKernelIncludes.h" #include "utils/timer.h" #include @@ -32,7 +31,7 @@ namespace o2::gpu struct GPUDefParameters; -namespace gpu_reconstruction_kernels +namespace gpu_reconstruction_kernels // TODO: Get rid of this namespace { struct deviceEvent { constexpr deviceEvent() = default; @@ -72,6 +71,86 @@ class GPUReconstructionProcessing : public GPUReconstruction public: ~GPUReconstructionProcessing() override; + using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + using threadContext = gpu_reconstruction_kernels::threadContext; + + struct RecoStepTimerMeta { + HighResTimer timerToGPU; + HighResTimer timerToHost; + HighResTimer timerTotal; + double timerCPU = 0.; + size_t bytesToGPU = 0; + size_t bytesToHost = 0; + uint32_t countToGPU = 0; + uint32_t countToHost = 0; + }; + + template + struct kernelInterfaceArguments { + using t = T; + static constexpr int32_t i = I; + }; + + struct krnlExec { + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} + uint32_t nBlocks; + uint32_t nThreads; + int32_t stream; + GPUReconstruction::krnlDeviceType device; + GPUDataTypes::RecoStep step; + }; + struct krnlRunRange { + constexpr krnlRunRange() = default; + constexpr krnlRunRange(uint32_t v) : index(v) {} + uint32_t index = 0; + }; + struct krnlEvent { + constexpr krnlEvent(deviceEvent* e = nullptr, deviceEvent* el = nullptr, int32_t n = 1) : ev(e), evList(el), nEvents(n) {} + deviceEvent* ev; + deviceEvent* evList; + int32_t nEvents; + }; + + struct krnlProperties { + krnlProperties(int32_t t = 0, int32_t b = 1, int32_t b2 = 0) : nThreads(t), minBlocks(b), forceBlocks(b2) {} + uint32_t nThreads; + uint32_t minBlocks; + uint32_t forceBlocks; + uint32_t total() { return forceBlocks ? forceBlocks : (nThreads * minBlocks); } + }; + + struct krnlSetup { + krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} + krnlExec x; + krnlRunRange y; + krnlEvent z; + }; + + struct krnlSetupTime : public krnlSetup { + double& t; + }; + + template + struct krnlSetupArgs : public kernelInterfaceArguments { + const krnlSetupTime s; + std::tuple sizeof(void*)), const Args&, const Args>::type...> v; + }; + + template + class KernelInterface : public S + { + public: + template + KernelInterface(const Args&... args) : S(args...) + { + } + + protected: + virtual void runKernelVirtual(const int num, const void* args); + }; + // Threading int32_t getNKernelHostThreads(bool splitCores); uint32_t getNActiveThreadsOuterLoop() const { return mNActiveThreadsOuterLoop; } @@ -94,23 +173,12 @@ class GPUReconstructionProcessing : public GPUReconstruction template void AddGPUEvents(T*& events); - virtual std::unique_ptr GetThreadContext() override; + virtual std::unique_ptr GetThreadContext() override; - struct RecoStepTimerMeta { - HighResTimer timerToGPU; - HighResTimer timerToHost; - HighResTimer timerTotal; - double timerCPU = 0.; - size_t bytesToGPU = 0; - size_t bytesToHost = 0; - uint32_t countToGPU = 0; - uint32_t countToHost = 0; - }; const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); } protected: GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); - using deviceEvent = gpu_reconstruction_kernels::deviceEvent; static const std::vector mKernelNames; @@ -181,7 +249,7 @@ HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t nu static int32_t id = getNextTimerId(); timerMeta* timer = getTimerById(id); if (timer == nullptr) { - int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); + int32_t max = std::max({mMaxHostThreads, GPUCA_MAX_STREAMS}); timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); } if (num == -1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc new file mode 100644 index 0000000000000..49d02515372b8 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc @@ -0,0 +1,41 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionProcessingKernels.h +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONPROCESSINGKERNELS_H +#define GPURECONSTRUCTIONPROCESSINGKERNELS_H + +#include "GPUReconstructionProcessing.h" +#include "GPUReconstructionKernelIncludes.h" + +namespace o2::gpu +{ + +template +void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args) +{ + switch (num) { // clang-format off +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ + case x_num: { \ + const auto& args2 = *(const krnlSetupArgs*)args; \ + ((T*)this)->template runKernelBackend(args2); \ + break; \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + } // clang-format on +} + +} // namespace o2::gpu + +#endif // GPURECONSTRUCTIONPROCESSINGKERNELS_H diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index 4693a1eff24f2..b25b93e957b15 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -14,6 +14,8 @@ #include "GPUReconstructionTimeframe.h" #include "GPUReconstruction.h" +#include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "display/GPUDisplayInterface.h" #include "GPUQA.h" #include "AliHLTTPCClusterMCData.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index fe2906caace80..970b331ea99fb 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -24,6 +24,7 @@ #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" #include "GPUDefParametersLoad.inc" +#include "GPUReconstructionProcessingKernels.inc" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" @@ -53,16 +54,23 @@ __global__ void dummyInitKernel(void*) {} #include "GPUReconstructionIncludesITS.h" -GPUReconstructionCUDABackend::GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) +GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg, sizeof(GPUReconstructionDeviceBase)) { if (mMaster == nullptr) { mInternals = new GPUReconstructionCUDAInternals; *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } - mDeviceBackendSettings.deviceType = DeviceType::CUDA; + mDeviceBackendSettings->deviceType = DeviceType::CUDA; +#ifndef __HIPCC__ // CUDA + mRtcSrcExtension = ".cu"; + mRtcBinExtension = ".fatbin"; +#else // HIP + mRtcSrcExtension = ".hip"; + mRtcBinExtension = ".o"; +#endif } -GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() +GPUReconstructionCUDA::~GPUReconstructionCUDA() { Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA if (mMaster == nullptr) { @@ -71,23 +79,11 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionCUDA::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return internal::GPUReconstructionCUDAChkErr(error, file, line); } -GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) -{ -#ifndef __HIPCC__ // CUDA - mRtcSrcExtension = ".cu"; - mRtcBinExtension = ".fatbin"; -#else // HIP - mRtcSrcExtension = ".hip"; - mRtcBinExtension = ".o"; -#endif -} -GPUReconstructionCUDA::~GPUReconstructionCUDA() = default; - GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); } void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr* trackerTraits, std::unique_ptr* vertexerTraits, std::unique_ptr* timeFrame) @@ -109,7 +105,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() constexpr int32_t reqVerMaj = 2; constexpr int32_t reqVerMin = 0; #endif - if (mProcessingSettings.rtc.enable && mProcessingSettings.rtctech.runTest == 2) { + if (GetProcessingSettings().rtc.enable && GetProcessingSettings().rtctech.runTest == 2) { mWarpSize = GPUCA_WARP_SIZE; genAndLoadRTC(); exit(0); @@ -123,14 +119,14 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUError("Error getting CUDA Device Count"); return (1); } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Available CUDA devices:"); } std::vector devicesOK(count, false); std::vector devMemory(count, 0); bool contextCreated = false; for (int32_t i = 0; i < count; i++) { - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Examining device %d", i); } size_t free, total; @@ -139,14 +135,14 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() #else // HIP if (GPUChkErrI(hipSetDevice(i))) { #endif - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); } continue; } contextCreated = true; if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } GPUChkErr(cudaDeviceReset()); @@ -156,13 +152,13 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErr(cudaDeviceReset()); contextCreated = false; } - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } if (GPUChkErrI(cudaGetDeviceProperties(&deviceProp, i))) { continue; } - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained device properties for device %d", i); } int32_t deviceOK = true; @@ -179,7 +175,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } deviceSpeed = (double)deviceProp.multiProcessorCount * (double)deviceProp.clockRate * (double)deviceProp.warpSize * (double)free * (double)deviceProp.major * (double)deviceProp.major; - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUImportant("Device %s%2d: %s (Rev: %d.%d - Mem Avail %lu / %lu)%s %s", deviceOK ? " " : "[", i, deviceProp.name, deviceProp.major, deviceProp.minor, free, (size_t)deviceProp.totalGlobalMem, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure); } if (!deviceOK) { @@ -191,7 +187,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() bestDevice = i; bestDeviceSpeed = deviceSpeed; } else { - if (mProcessingSettings.debugLevel >= 2 && mProcessingSettings.deviceNum < 0) { + if (GetProcessingSettings().debugLevel >= 2 && GetProcessingSettings().deviceNum < 0) { GPUInfo("Skipping: Speed %f < %f\n", deviceSpeed, bestDeviceSpeed); } } @@ -204,15 +200,15 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUImportant("Requiring Revision %d.%d, Mem: %lu", reqVerMaj, reqVerMin, std::max(mDeviceMemorySize, REQUIRE_MIN_MEMORY)); #endif noDevice = true; - } else if (mProcessingSettings.deviceNum > -1) { - if (mProcessingSettings.deviceNum >= (signed)count) { - GPUError("Requested device ID %d does not exist", mProcessingSettings.deviceNum); + } else if (GetProcessingSettings().deviceNum > -1) { + if (GetProcessingSettings().deviceNum >= (signed)count) { + GPUError("Requested device ID %d does not exist", GetProcessingSettings().deviceNum); noDevice = true; - } else if (!devicesOK[mProcessingSettings.deviceNum]) { - GPUError("Unsupported device requested (%d)", mProcessingSettings.deviceNum); + } else if (!devicesOK[GetProcessingSettings().deviceNum]) { + GPUError("Unsupported device requested (%d)", GetProcessingSettings().deviceNum); noDevice = true; } else { - bestDevice = mProcessingSettings.deviceNum; + bestDevice = GetProcessingSettings().deviceNum; } } if (noDevice) { @@ -225,7 +221,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Using CUDA Device %s with Properties:", deviceProp.name); GPUInfo("\ttotalGlobalMem = %ld", (uint64_t)deviceProp.totalGlobalMem); GPUInfo("\tsharedMemPerBlock = %ld", (uint64_t)deviceProp.sharedMemPerBlock); @@ -244,7 +240,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUInfo("\ttextureAlignment = %ld", (uint64_t)deviceProp.textureAlignment); GPUInfo(" "); } - if (deviceProp.warpSize != GPUCA_WARP_SIZE && !mProcessingSettings.rtc.enable) { + if (deviceProp.warpSize != GPUCA_WARP_SIZE && !GetProcessingSettings().rtc.enable) { throw std::runtime_error("Invalid warp size on GPU"); } mWarpSize = deviceProp.warpSize; @@ -280,7 +276,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaDeviceReset()); return (1); } - if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, GetProcessingSettings().deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { GPUError("Error setting CUDA stack size"); GPUChkErrI(cudaDeviceReset()); return (1); @@ -302,7 +298,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mDeviceMemorySize = mDeviceMemorySize * 2 / 3; // Leave 1/3 of GPU memory for event display } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Allocating memory on GPU"); } if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUChkErrI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { @@ -312,7 +308,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaDeviceReset()); return (1); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Allocating memory on Host"); } if (GPUChkErrI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { @@ -320,7 +316,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaDeviceReset()); return (1); } - if (mProcessingSettings.debugLevel >= 1) { + if (GetProcessingSettings().debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); if (GPUChkErrI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { @@ -344,7 +340,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mBlockCount), dim3(256), 0, 0, mDeviceMemoryBase); #endif - if (mProcessingSettings.rtc.enable) { + if (GetProcessingSettings().rtc.enable) { genAndLoadRTC(); } #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 @@ -419,17 +415,17 @@ void GPUReconstructionCUDA::genAndLoadRTC() throw std::runtime_error("Runtime compilation failed"); } for (uint32_t i = 0; i < nCompile; i++) { - if (mProcessingSettings.rtctech.runTest != 2) { + if (GetProcessingSettings().rtctech.runTest != 2) { mInternals->kernelModules.emplace_back(std::make_unique()); GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); } - if (mProcessingSettings.rtctech.runTest == 2) { + if (GetProcessingSettings().rtctech.runTest == 2) { return; } - loadKernelModules(mProcessingSettings.rtc.compilePerKernel); + loadKernelModules(GetProcessingSettings().rtc.compilePerKernel); } int32_t GPUReconstructionCUDA::ExitDevice_Runtime() @@ -472,7 +468,7 @@ int32_t GPUReconstructionCUDA::ExitDevice_Runtime() size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { stream = -1; } if (stream == -1) { @@ -490,7 +486,7 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, if (ev) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); } - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); } return size; @@ -512,7 +508,7 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s if (ev && stream != -1) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; @@ -521,7 +517,7 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } -std::unique_ptr GPUReconstructionCUDA::GetThreadContext() +std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { GPUChkErr(cudaSetDevice(mDeviceId)); return GPUReconstructionProcessing::GetThreadContext(); @@ -565,14 +561,14 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool GPUError("CUDA Error %s while running (%s) (Stream %d)", cudaGetErrorString(cuErr), state, stream); return (1); } - if (!force && mProcessingSettings.debugLevel <= 0) { + if (!force && GetProcessingSettings().debugLevel <= 0) { return (0); } if (GPUChkErrI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { GPUError("CUDA Error while synchronizing (%s) (Stream %d)", state, stream); return (1); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("GPU Sync Done"); } return (0); @@ -580,7 +576,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool int32_t GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, size_t size) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Registering %zu bytes of memory for GPU", size); } return GPUChkErrI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); @@ -591,7 +587,7 @@ int32_t GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr) return GPUChkErrI(cudaHostUnregister((void*)ptr)); } -void GPUReconstructionCUDABackend::PrintKernelOccupancies() +void GPUReconstructionCUDA::PrintKernelOccupancies() { int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; GPUChkErr(cudaSetDevice(mDeviceId)); @@ -612,7 +608,7 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) GPUFatal("kernel numbers out of sync"); \ } \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ - if (mProcessingSettings.debugLevel >= 3) { \ + if (GetProcessingSettings().debugLevel >= 3) { \ GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ } \ GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ @@ -680,8 +676,3 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options #endif // ORT_ROCM_BUILD } #endif // __HIPCC__ - -namespace o2::gpu -{ -template class GPUReconstructionKernels; -} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 2fc4d14bba491..ed75100dfe351 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -16,6 +16,7 @@ #define GPURECONSTRUCTIONCUDA_H #include "GPUReconstructionDeviceBase.h" +#include "GPUCommonAlgorithm.h" #include #include @@ -34,38 +35,31 @@ namespace o2::gpu { struct GPUReconstructionCUDAInternals; -class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase +class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterface { public: - ~GPUReconstructionCUDABackend() override; - - protected: - GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); + GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg); + ~GPUReconstructionCUDA() override; void PrintKernelOccupancies() override; virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template void runKernelBackend(const krnlSetupArgs& args); - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); + + protected: GPUReconstructionCUDAInternals* mInternals; -}; -class GPUReconstructionCUDA : public GPUReconstructionKernels -{ - public: - ~GPUReconstructionCUDA() override; - GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg); + template + void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - protected: int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; - std::unique_ptr GetThreadContext() override; + std::unique_ptr GetThreadContext() override; void SynchronizeGPU() override; int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override; void SynchronizeStream(int32_t stream) override; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index abcd47ca01c90..5706f32e73e96 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -37,8 +37,8 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math); int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + - std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + - GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; } @@ -53,12 +53,12 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) kernelsall += kernels[i] + "\n"; } - std::string baseCommand = (mProcessingSettings.rtctech.prependCommand != "" ? (mProcessingSettings.rtctech.prependCommand + " ") : ""); + std::string baseCommand = (GetProcessingSettings().rtctech.prependCommand != "" ? (GetProcessingSettings().rtctech.prependCommand + " ") : ""); baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); - baseCommand += std::string(" ") + (mProcessingSettings.rtctech.overrideArchitecture != "" ? mProcessingSettings.rtctech.overrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); + baseCommand += std::string(" ") + (GetProcessingSettings().rtctech.overrideArchitecture != "" ? GetProcessingSettings().rtctech.overrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); - if (mProcessingSettings.rtctech.loadLaunchBoundsFromFile.size()) { - FILE* fp = fopen(mProcessingSettings.rtctech.loadLaunchBoundsFromFile.c_str(), "rb"); + if (GetProcessingSettings().rtctech.loadLaunchBoundsFromFile.size()) { + FILE* fp = fopen(GetProcessingSettings().rtctech.loadLaunchBoundsFromFile.c_str(), "rb"); if (fp == nullptr) { throw std::runtime_error("Cannot open launch bounds parameter module file"); } @@ -75,12 +75,12 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) + "#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n"; - if (mProcessingSettings.rtctech.printLaunchBounds || mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().rtctech.printLaunchBounds || GetProcessingSettings().debugLevel >= 3) { GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str()); } char shasource[21], shaparam[21], shacmd[21], shakernels[21], shabounds[21]; - if (mProcessingSettings.rtc.cacheOutput) { + if (GetProcessingSettings().rtc.cacheOutput) { o2::framework::internal::SHA1(shasource, _binary_GPUReconstructionCUDArtc_src_start, _binary_GPUReconstructionCUDArtc_src_len); o2::framework::internal::SHA1(shaparam, rtcparam.c_str(), rtcparam.size()); o2::framework::internal::SHA1(shacmd, baseCommand.c_str(), baseCommand.size()); @@ -88,16 +88,16 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) o2::framework::internal::SHA1(shabounds, launchBounds.c_str(), launchBounds.size()); } - nCompile = mProcessingSettings.rtc.compilePerKernel ? kernels.size() : 1; + nCompile = GetProcessingSettings().rtc.compilePerKernel ? kernels.size() : 1; bool cacheLoaded = false; int32_t fd = 0; - if (mProcessingSettings.rtc.cacheOutput) { - if (mProcessingSettings.rtctech.cacheFolder != ".") { - std::filesystem::create_directories(mProcessingSettings.rtctech.cacheFolder); + if (GetProcessingSettings().rtc.cacheOutput) { + if (GetProcessingSettings().rtctech.cacheFolder != ".") { + std::filesystem::create_directories(GetProcessingSettings().rtctech.cacheFolder); } - if (mProcessingSettings.rtctech.cacheMutex) { + if (GetProcessingSettings().rtctech.cacheMutex) { mode_t mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - fd = open((mProcessingSettings.rtctech.cacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask); + fd = open((GetProcessingSettings().rtctech.cacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask); if (fd == -1) { throw std::runtime_error("Error opening rtc cache mutex lock file"); } @@ -107,7 +107,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } } - FILE* fp = fopen((mProcessingSettings.rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "rb"); + FILE* fp = fopen((GetProcessingSettings().rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "rb"); char sharead[20]; if (fp) { size_t len; @@ -116,7 +116,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) if (fread(sharead, 1, 20, fp) != 20) { throw std::runtime_error("Cache file corrupt"); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { char shaprint1[41], shaprint2[41]; for (uint32_t i = 0; i < 20; i++) { sprintf(shaprint1 + 2 * i, "%02X ", shacmp[i]); @@ -124,7 +124,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } GPUInfo("SHA for %s: expected %s, read %s", name, shaprint1, shaprint2); } - if (!mProcessingSettings.rtctech.ignoreCacheValid && memcmp(sharead, shacmp, 20)) { + if (!GetProcessingSettings().rtctech.ignoreCacheValid && memcmp(sharead, shacmp, 20)) { GPUInfo("Cache file content outdated (%s)", name); return 1; } @@ -142,7 +142,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) if (fread(&cachedSettings, sizeof(cachedSettings), 1, fp) != 1) { throw std::runtime_error("Cache file corrupt"); } - if (!mProcessingSettings.rtctech.ignoreCacheValid && !(cachedSettings == mProcessingSettings.rtc)) { + if (!GetProcessingSettings().rtctech.ignoreCacheValid && !(cachedSettings == GetProcessingSettings().rtc)) { GPUInfo("Cache file content outdated (rtc parameters)"); break; } @@ -172,13 +172,13 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } } if (!cacheLoaded) { - if (mProcessingSettings.debugLevel >= 0) { + if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("Starting CUDA RTC Compilation"); } HighResTimer rtcTimer; rtcTimer.ResetStart(); tbb::parallel_for(0, nCompile, [&](auto i) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { printf("Compiling %s\n", (filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); } FILE* fp = fopen((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str(), "w+b"); @@ -187,10 +187,10 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } std::string kernel = "extern \"C\" {"; - kernel += mProcessingSettings.rtc.compilePerKernel ? kernels[i] : kernelsall; + kernel += GetProcessingSettings().rtc.compilePerKernel ? kernels[i] : kernelsall; kernel += "}"; - bool deterministic = mProcessingSettings.rtc.deterministic || (mProcessingSettings.rtc.compilePerKernel && o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end()); + bool deterministic = GetProcessingSettings().rtc.deterministic || (GetProcessingSettings().rtc.compilePerKernel && o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end()); const std::string deterministicStr = std::string(deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n"); if (fwrite(deterministicStr.c_str(), 1, deterministicStr.size(), fp) != deterministicStr.size() || @@ -206,26 +206,26 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) command += std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len); } command += " -c " + filename + "_" + std::to_string(i) + mRtcSrcExtension + " -o " + filename + "_" + std::to_string(i) + mRtcBinExtension; - if (mProcessingSettings.debugLevel < 0) { + if (GetProcessingSettings().debugLevel < 0) { command += " &> /dev/null"; - } else if (mProcessingSettings.debugLevel < 2) { + } else if (GetProcessingSettings().debugLevel < 2) { command += " > /dev/null"; } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { printf("Running command %s\n", command.c_str()); } if (system(command.c_str())) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { printf("Source code file: %s", filename.c_str()); } throw std::runtime_error("Error during CUDA compilation"); } // clang-format off }, tbb::simple_partitioner()); // clang-format on - if (mProcessingSettings.debugLevel >= 0) { + if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("RTC Compilation finished (%f seconds)", rtcTimer.GetCurrentElapsedTime()); } - if (mProcessingSettings.rtc.cacheOutput) { - FILE* fp = fopen((mProcessingSettings.rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "w+b"); + if (GetProcessingSettings().rtc.cacheOutput) { + FILE* fp = fopen((GetProcessingSettings().rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "w+b"); if (fp == nullptr) { throw std::runtime_error("Cannot open cache file for writing"); } @@ -236,7 +236,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) fwrite(shacmd, 1, 20, fp) != 20 || fwrite(shakernels, 1, 20, fp) != 20 || fwrite(shabounds, 1, 20, fp) != 20 || - fwrite(&mProcessingSettings.rtc, sizeof(mProcessingSettings.rtc), 1, fp) != 1) { + fwrite(&GetProcessingSettings().rtc, sizeof(GetProcessingSettings().rtc), 1, fp) != 1) { throw std::runtime_error("Error writing cache file"); } @@ -263,7 +263,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) fclose(fp); } } - if (mProcessingSettings.rtc.cacheOutput && mProcessingSettings.rtctech.cacheMutex) { + if (GetProcessingSettings().rtc.cacheOutput && GetProcessingSettings().rtctech.cacheMutex) { if (lockf(fd, F_ULOCK, 0)) { throw std::runtime_error("Error unlocking RTC cache mutex file"); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index f3fc21243ef0e..0813c9d22ea09 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -44,7 +44,7 @@ struct GPUReconstructionCUDAInternals { class GPUDebugTiming { public: - GPUDebugTiming(bool d, gpu_reconstruction_kernels::deviceEvent* t, cudaStream_t* s, const gpu_reconstruction_kernels::krnlSetupTime& x, GPUReconstructionCUDABackend* r) : mDeviceTimers(t), mStreams(s), mXYZ(x), mRec(r), mDo(d) + GPUDebugTiming(bool d, GPUReconstructionProcessing::deviceEvent* t, cudaStream_t* s, const GPUReconstructionProcessing::krnlSetupTime& x, GPUReconstructionCUDA* r) : mDeviceTimers(t), mStreams(s), mXYZ(x), mRec(r), mDo(d) { if (mDo) { if (mDeviceTimers) { @@ -71,10 +71,10 @@ class GPUDebugTiming } private: - gpu_reconstruction_kernels::deviceEvent* mDeviceTimers; + GPUReconstructionProcessing::deviceEvent* mDeviceTimers; cudaStream_t* mStreams; - const gpu_reconstruction_kernels::krnlSetupTime& mXYZ; - GPUReconstructionCUDABackend* mRec; + const GPUReconstructionProcessing::krnlSetupTime& mXYZ; + GPUReconstructionCUDA* mRec; HighResTimer mTimer; bool mDo; }; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index d53f7cbd81ca9..758ab1b0e36c3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -27,16 +27,16 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC #endif template <> -inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } template -inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1 - if (!mProcessingSettings.rtc.enable) { + if (!GetProcessingSettings().rtc.enable) { backendInternal::runKernelBackendMacro(_xyz, this, args...); } else #endif @@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet } template -void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args) { auto& x = args.s.x; auto& z = args.s.z; @@ -66,7 +66,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); + GPUDebugTiming timer(GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); } GPUChkErr(cudaGetLastError()); @@ -79,7 +79,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); #else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc @@ -87,7 +87,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); + template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); #ifndef __HIPCC__ // CUDA version #define GPUCA_KRNL_CALL(x_class, ...) \ diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e276f83413bbc..28c809dd4a09a 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUReconstructionOCLIncludesHost.h" +#include "GPUReconstructionProcessingKernels.inc" #include "GPUDefParametersLoad.inc" #include @@ -33,16 +34,16 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv); GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL(cfg); } -GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) +GPUReconstructionOCL::GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg, sizeof(GPUReconstructionDeviceBase)) { if (mMaster == nullptr) { mInternals = new GPUReconstructionOCLInternals; *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } - mDeviceBackendSettings.deviceType = DeviceType::OCL; + mDeviceBackendSettings->deviceType = DeviceType::OCL; } -GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() +GPUReconstructionOCL::~GPUReconstructionOCL() { Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit if (mMaster == nullptr) { @@ -51,7 +52,7 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); -int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionOCL::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error != CL_SUCCESS) { @@ -60,7 +61,7 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons return error != CL_SUCCESS; } -int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() +int32_t GPUReconstructionOCL::InitDevice_Runtime() { if (mMaster == nullptr) { cl_int ocl_error; @@ -71,7 +72,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() if (num_platforms == 0) { GPUErrorReturn("No OpenCL Platform found"); } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("%d OpenCL Platforms found", num_platforms); } @@ -118,17 +119,17 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1; for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) { - if (mProcessingSettings.oclPlatformNum >= 0) { - if (mProcessingSettings.oclPlatformNum >= (int32_t)num_platforms) { + if (GetProcessingSettings().oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= (int32_t)num_platforms) { GPUErrorReturn("Invalid platform specified"); } - iPlatform = mProcessingSettings.oclPlatformNum; + iPlatform = GetProcessingSettings().oclPlatformNum; } std::string platformUsageInfo; bool platformCompatible = false; queryPlatform(platforms[iPlatform]); if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) { - if (mProcessingSettings.oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= 0) { GPUErrorReturn("No device in requested platform or error obtaining device count"); } platformUsageInfo += " - no devices"; @@ -139,32 +140,32 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } } - if (mProcessingSettings.oclPlatformNum >= 0 || mProcessingSettings.debugLevel >= 2) { - GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", mProcessingSettings.oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() : ""); + if (GetProcessingSettings().oclPlatformNum >= 0 || GetProcessingSettings().debugLevel >= 2) { + GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", GetProcessingSettings().oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", GetProcessingSettings().debugLevel >= 2 ? platformUsageInfo.c_str() : ""); } - if (platformCompatible || mProcessingSettings.oclPlatformNum >= 0 || (mProcessingSettings.oclPlatformNum == -2 && deviceCount)) { + if (platformCompatible || GetProcessingSettings().oclPlatformNum >= 0 || (GetProcessingSettings().oclPlatformNum == -2 && deviceCount)) { if (deviceCount > devices.size()) { devices.resize(deviceCount); } if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) { - if (mProcessingSettings.oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= 0) { GPUErrorReturn("Error getting OpenCL devices"); } continue; } for (uint32_t i = 0; i < deviceCount; i++) { - if (mProcessingSettings.deviceNum >= 0) { - if (mProcessingSettings.deviceNum >= (signed)deviceCount) { - GPUErrorReturn("Requested device ID %d does not exist", mProcessingSettings.deviceNum); + if (GetProcessingSettings().deviceNum >= 0) { + if (GetProcessingSettings().deviceNum >= (signed)deviceCount) { + GPUErrorReturn("Requested device ID %d does not exist", GetProcessingSettings().deviceNum); } - i = mProcessingSettings.deviceNum; + i = GetProcessingSettings().deviceNum; } bool deviceOK = true; queryDevice(devices[i]); std::string deviceFailure; - if (mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) { + if (GetProcessingSettings().gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) { deviceOK = false; deviceFailure += " - No GPU device"; } @@ -193,12 +194,12 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders; - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str()); } if (!deviceOK) { - if (mProcessingSettings.deviceNum >= 0) { - GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, mProcessingSettings.deviceNum); + if (GetProcessingSettings().deviceNum >= 0) { + GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, GetProcessingSettings().deviceNum); break; } continue; @@ -209,12 +210,12 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() bestDeviceSpeed = deviceSpeed; mOclVersion = platform_version_f; } - if (mProcessingSettings.deviceNum >= 0) { + if (GetProcessingSettings().deviceNum >= 0) { break; } } } - if (mProcessingSettings.oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= 0) { break; } } @@ -238,7 +239,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION); int versionMajor, versionMinor; sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str()); GPUInfo("\tVersion = %s", deviceVersion); GPUInfo("\tFrequency = %d", (int32_t)device_freq); @@ -271,7 +272,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() return 1; } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("OpenCL program and kernels loaded successfully"); } @@ -289,21 +290,21 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } if (device_type & CL_DEVICE_TYPE_CPU) { - if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel >= 2) { GPUInfo("Disabling device timers for CPU device"); } - mProcessingSettings.deviceTimers = 0; + mProcessingSettings->deviceTimers = 0; } for (int32_t i = 0; i < mNStreams; i++) { #ifdef CL_VERSION_2_0 cl_queue_properties prop = 0; - if (versionMajor >= 2 && IsGPU() && mProcessingSettings.deviceTimers) { + if (versionMajor >= 2 && IsGPU() && GetProcessingSettings().deviceTimers) { prop |= CL_QUEUE_PROFILING_ENABLE; } mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error); - if (mProcessingSettings.deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) { + if (GetProcessingSettings().deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) { GPUError("GPU device timers not supported by OpenCL platform, disabling"); - mProcessingSettings.deviceTimers = 0; + mProcessingSettings->deviceTimers = 0; prop = 0; mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error); } @@ -351,7 +352,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUErrorReturn("Error obtaining device memory ptr"); } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Mapping hostmemory"); } mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error); @@ -362,7 +363,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mDeviceMemoryBase = ((void**)mHostMemoryBase)[0]; mDeviceConstantMem = (GPUConstantMem*)((void**)mHostMemoryBase)[1]; - if (mProcessingSettings.debugLevel >= 1) { + if (GetProcessingSettings().debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); } @@ -386,7 +387,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() return (0); } -int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() +int32_t GPUReconstructionOCL::ExitDevice_Runtime() { // Uninitialize OPENCL SynchronizeGPU(); @@ -418,12 +419,12 @@ int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() return (0); } -size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) +size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) { if (evList == nullptr) { nEvents = 0; } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { stream = -1; } if (stream == -1) { @@ -440,33 +441,33 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t } else { GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); } - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); } return size; } -size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev) +size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev) { if (stream == -1) { SynchronizeGPU(); } GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; } -void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } +void GPUReconstructionOCL::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } -void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } +void GPUReconstructionOCL::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } -int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event) +int32_t GPUReconstructionOCL::DoStuckProtection(int32_t stream, deviceEvent event) { - if (mProcessingSettings.stuckProtection) { + if (GetProcessingSettings().stuckProtection) { cl_int tmp = 0; - for (int32_t i = 0; i <= mProcessingSettings.stuckProtection / 50; i++) { + for (int32_t i = 0; i <= GetProcessingSettings().stuckProtection / 50; i++) { usleep(50); clGetEventInfo(event.get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(tmp), &tmp, nullptr); if (tmp == CL_COMPLETE) { @@ -483,25 +484,25 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve return 0; } -void GPUReconstructionOCLBackend::SynchronizeGPU() +void GPUReconstructionOCL::SynchronizeGPU() { for (int32_t i = 0; i < mNStreams; i++) { GPUChkErr(clFinish(mInternals->command_queue[i])); } } -void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } +void GPUReconstructionOCL::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } -void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } +void GPUReconstructionOCL::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } -void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) +void GPUReconstructionOCL::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { if (nEvents) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); } } -bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEvents) +bool GPUReconstructionOCL::IsEventDone(deviceEvent* evList, int32_t nEvents) { cl_int eventdone; for (int32_t i = 0; i < nEvents; i++) { @@ -513,10 +514,10 @@ bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEven return true; } -int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, bool force) +int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool force) { // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode - if (!force && mProcessingSettings.debugLevel <= 0) { + if (!force && GetProcessingSettings().debugLevel <= 0) { return (0); } for (int32_t i = 0; i < mNStreams; i++) { @@ -524,13 +525,13 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i); } } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("GPU Sync Done"); } return (0); } -int32_t GPUReconstructionOCLBackend::GetOCLPrograms() +int32_t GPUReconstructionOCL::GetOCLPrograms() { cl_int ocl_error; @@ -571,7 +572,7 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() return AddKernels(); } -const char* GPUReconstructionOCLBackend::convertErrorToString(int32_t errorcode) +const char* GPUReconstructionOCL::convertErrorToString(int32_t errorcode) { static const std::map error_map = { {CL_SUCCESS, "CL_SUCCESS"}, diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index abde42f01f073..091bc0409630d 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -27,14 +27,16 @@ namespace o2::gpu { struct GPUReconstructionOCLInternals; -class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase +class GPUReconstructionOCL : public GPUReconstructionProcessing::KernelInterface { public: - ~GPUReconstructionOCLBackend() override; + GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg); + ~GPUReconstructionOCL() override; - protected: - GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg); + template + void runKernelBackend(const krnlSetupArgs& args); + protected: int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; @@ -61,8 +63,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase GPUReconstructionOCLInternals* mInternals; float mOclVersion; - template - void runKernelBackend(const krnlSetupArgs& args); template S& getKernelObject(); @@ -78,7 +78,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t AddKernels(); }; -using GPUReconstructionOCL = GPUReconstructionKernels; } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 0bb2f25093789..919791948d6c3 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -54,7 +54,7 @@ struct GPUReconstructionOCLInternals { } // namespace o2::gpu template -inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) +inline int64_t GPUReconstructionOCL::OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) { int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter); if (retVal) { @@ -67,12 +67,12 @@ inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters_helper(cl_ker } template -inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) +inline int64_t GPUReconstructionOCL::OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) { return OCLsetKernelParameters_helper(kernel, 0, args...); } -inline int64_t GPUReconstructionOCLBackend::clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait, cl_int nWaitEvents) +inline int64_t GPUReconstructionOCL::clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait, cl_int nWaitEvents) { return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent); } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index cca634fba65fc..29b71017e9f73 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -13,16 +13,17 @@ /// \author David Rohr #include "GPUReconstructionOCLIncludesHost.h" +#include "GPUReconstructionKernelIncludes.h" template <> -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); } template -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { cl_kernel k = getKernelObject(); auto& x = _xyz.x; @@ -33,14 +34,14 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu cl_event ev; cl_event* evr; bool tmpEvent = false; - if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + if (z.ev == nullptr && GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0) { evr = &ev; tmpEvent = true; } else { evr = (cl_event*)z.ev; } GPUChkErr(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); - if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + if (GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0) { cl_ulong time_start, time_end; GPUChkErr(clWaitForEvents(1, evr)); GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); @@ -53,13 +54,13 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu } template -void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args) { std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); } template -int32_t GPUReconstructionOCLBackend::AddKernel() +int32_t GPUReconstructionOCL::AddKernel() { std::string name(GetKernelName()); std::string kname("krnl_" + name); @@ -75,12 +76,12 @@ int32_t GPUReconstructionOCLBackend::AddKernel() } template -S& GPUReconstructionOCLBackend::getKernelObject() +S& GPUReconstructionOCL::getKernelObject() { return mInternals->kernels[GetKernelNum()]; } -int32_t GPUReconstructionOCLBackend::AddKernels() +int32_t GPUReconstructionOCL::AddKernels() { #define GPUCA_KRNL(x_class, ...) \ if (AddKernel()) { \ @@ -91,6 +92,6 @@ int32_t GPUReconstructionOCLBackend::AddKernels() return 0; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index e82799b9e59c3..0cd302cc0be94 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -113,7 +113,7 @@ set(HDRS_INSTALL Base/GPUReconstructionIO.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h - Base/GPUReconstructionKernels.h + Base/GPUReconstructionCPUKernels.h DataCompression/GPUTPCClusterRejection.h DataCompression/GPUTPCCompressionKernels.inc DataCompression/TPCClusterDecompressionCore.inc @@ -127,6 +127,7 @@ set(HDRS_INSTALL DataTypes/GPUTRDDef.h DataTypes/GPUTRDInterfaceO2Track.h DataTypes/GPUTriggerOutputs.h + DataTypes/GPUKernelClassesFwd.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h Definitions/GPUDefParametersWrapper.h @@ -137,6 +138,7 @@ set(HDRS_INSTALL Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h + Global/GPUChainTrackingGetters.inc Global/GPUErrorCodes.h Merger/GPUTPCGMBorderTrack.h Merger/GPUTPCGMMergedTrack.h @@ -217,11 +219,11 @@ set(SRCS_NO_H ${SRCS_NO_H} set(HDRS_INSTALL ${HDRS_INSTALL} ITS/GPUITSTrack.h - TPCClusterFinder/Array2D.h + TPCClusterFinder/CfArray2D.h TPCClusterFinder/CfConsts.h TPCClusterFinder/CfFragment.h TPCClusterFinder/CfUtils.h - TPCClusterFinder/ChargePos.h + TPCClusterFinder/CfChargePos.h Definitions/clusterFinderDefs.h TPCClusterFinder/PackedCharge.h TPCClusterFinder/GPUTPCCFChainContext.h) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 8a22545314252..2a0c5b58d8a83 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -17,6 +17,7 @@ #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" #include "GPUDefParametersRuntime.h" +#include "GPUConstantMem.h" using namespace o2::gpu; @@ -123,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters = io.clustersNative->nClustersTotal; mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; - mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); + mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); diff --git a/GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h b/GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h new file mode 100644 index 0000000000000..405eb339dea3b --- /dev/null +++ b/GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h @@ -0,0 +1,40 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUKernelClassesFwd.h +/// \author David Rohr + +#ifndef GPUKERNELCLASSESFWDN_H +#define GPUKERNELCLASSESFWDN_H + +#include "GPUTRDDef.h" + +namespace o2::gpu +{ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) class GPUCA_M_FIRST(GPUCA_M_STRIP(x_class)); +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + +struct GPUTPCClusterOccupancyMapBin; +namespace gputpcgmmergertypes +{ +struct GPUTPCGMBorderRange; +} +struct GPUTPCLinearLabels; +struct CfChargePos; +} // namespace o2::gpu + +namespace o2::tpc +{ +struct ClusterNative; +} // namespace o2::tpc + +#endif diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 1e99e3b73736f..5df324fcba648 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -16,11 +16,14 @@ #define GPUCHAIN_H #include "GPUReconstructionCPU.h" +#include "GPUReconstructionCPUKernels.h" +#include "GPUKernelClassesFwd.h" #include namespace o2::gpu { + class GPUChain { friend class GPUReconstruction; @@ -30,10 +33,10 @@ class GPUChain using GeneralStep = GPUReconstruction::GeneralStep; using InOutPointerType = GPUReconstruction::InOutPointerType; using GeometryType = GPUReconstruction::GeometryType; - using krnlRunRange = gpu_reconstruction_kernels::krnlRunRange; - using krnlExec = gpu_reconstruction_kernels::krnlExec; - using krnlEvent = gpu_reconstruction_kernels::krnlEvent; - using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + using krnlRunRange = GPUReconstructionProcessing::krnlRunRange; + using krnlExec = GPUReconstructionProcessing::krnlExec; + using krnlEvent = GPUReconstructionProcessing::krnlEvent; + using deviceEvent = GPUReconstructionProcessing::deviceEvent; static constexpr krnlRunRange krnlRunRangeNone{0}; static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0}; @@ -56,20 +59,20 @@ class GPUChain virtual void DumpSettings(const char* dir = "") {} virtual void ReadSettings(const char* dir = "") {} - const GPUParam& GetParam() const { return mRec->mHostConstantMem->param; } - const GPUSettingsGRP& GetGRPSettings() const { return mRec->mGRPSettings; } - const GPUCalibObjectsConst& calib() const { return processors()->calibObjects; } + const GPUParam& GetParam() const { return mRec->GetParam(); } + const GPUSettingsGRP& GetGRPSettings() const { return mRec->GetGRPSettings(); } + const GPUCalibObjectsConst& GetCalib() const { return mRec->GetCalib(); } GPUReconstruction* rec() { return mRec; } const GPUReconstruction* rec() const { return mRec; } - inline const GPUConstantMem* GetProcessors() { return mRec->processors(); } + inline const GPUConstantMem* GetProcessors() const { return mRec->processors(); } // Make functions from GPUReconstruction*** available GPUReconstruction::RecoStepField GetRecoSteps() const { return mRec->GetRecoSteps(); } GPUReconstruction::RecoStepField GetRecoStepsGPU() const { return mRec->GetRecoStepsGPU(); } GPUReconstruction::InOutTypeField GetRecoStepsInputs() const { return mRec->GetRecoStepsInputs(); } GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const { return mRec->GetRecoStepsOutputs(); } - inline const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->mDeviceBackendSettings; } - inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; } + inline const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->GetDeviceBackendSettings(); } + inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->GetProcessingSettings(); } protected: GPUReconstructionCPU* mRec; @@ -102,7 +105,7 @@ class GPUChain } inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); } inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); } - virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } + virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } inline void SynchronizeGPU() { mRec->SynchronizeGPU(); } inline void ReleaseEvent(deviceEvent ev, bool doGPU = true) { @@ -171,13 +174,16 @@ class GPUChain { mRec->ReadStructFromFile(file, obj); } + template - inline void runKernel(gpu_reconstruction_kernels::krnlSetup&& setup, Args&&... args) + requires(sizeof(S) >= 0) // Yields better incomplete type errors than calling runKernelCallInterface directly + inline void runKernel(GPUReconstructionProcessing::krnlSetup&& setup, Args const&... args) { - return mRec->runKernel(std::forward(setup), std::forward(args)...); + runKernelCallInterface(std::forward(setup), args...); } + template - gpu_reconstruction_kernels::krnlProperties getKernelProperties() + GPUReconstructionProcessing::krnlProperties getKernelProperties() { return mRec->getKernelProperties(); } @@ -233,6 +239,16 @@ class GPUChain private: template void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args); + +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + template \ + requires(std::is_same_v && I == S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel))) \ + inline void runKernelCallInterface(GPUReconstructionProcessing::krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ + { \ + mRec->runKernelInterface(std::forward(setup) GPUCA_M_STRIP(x_forward)); \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL }; template @@ -243,7 +259,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... } HighResTimer* timer = nullptr; size_t* bytes = nullptr; - if (mRec->mProcessingSettings.debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0 + if (mRec->GetProcessingSettings().debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0 int32_t id = mRec->getRecoStepNum(step, false); if (id != -1) { auto& tmp = mRec->mTimersRecoSteps[id]; diff --git a/GPU/GPUTracking/Global/GPUChainITS.cxx b/GPU/GPUTracking/Global/GPUChainITS.cxx index 640b92a0eb0f4..eeead79b1840b 100644 --- a/GPU/GPUTracking/Global/GPUChainITS.cxx +++ b/GPU/GPUTracking/Global/GPUChainITS.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainITS.h" +#include "GPUConstantMem.h" #include "DataFormatsITS/TrackITS.h" #include "ITStracking/ExternalAllocator.h" #include "GPUReconstructionIncludesITS.h" diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 43fa49ff74817..a3f9b996e070d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -18,6 +18,8 @@ #include #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" +#include "GPUReconstructionIO.h" #include "GPUChainTrackingDefs.h" #include "GPUTPCClusterData.h" #include "GPUTPCSectorOutCluster.h" @@ -755,7 +757,7 @@ int32_t GPUChainTracking::RunChain() } } - if (GetProcessingSettings().trdTrackModelO2 ? runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking) : runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking)) { + if (runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking)) { return 1; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 5779cec31130c..8664652b549e3 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -24,6 +24,12 @@ #include #include +namespace o2::dataformats +{ +template +class ConstMCTruthContainer; +} // namespace o2::dataformats + namespace o2::trd { class GeometryFlat; @@ -39,6 +45,9 @@ class CalibdEdxContainer; namespace o2::base { class MatLayerCylSet; +template +class PropagatorImpl; +using Propagator = PropagatorImpl; } // namespace o2::base namespace o2::gpu @@ -55,6 +64,8 @@ struct GPUChainTrackingFinalContext; struct GPUTPCCFChainContext; struct GPUNewCalibValues; struct GPUTriggerOutputs; +struct CfFragment; +class GPUTPCClusterFinder; class GPUChainTracking : public GPUChain { @@ -137,11 +148,6 @@ class GPUChainTracking : public GPUChain void ConvertZSFilter(bool zs12bit); // Getters for external usage of tracker classes - GPUTRDTrackerGPU* GetTRDTrackerGPU() { return &processors()->trdTrackerGPU; } - GPUTPCTracker* GetTPCSectorTrackers() { return processors()->tpcTrackers; } - const GPUTPCTracker* GetTPCSectorTrackers() const { return processors()->tpcTrackers; } - const GPUTPCGMMerger& GetTPCMerger() const { return processors()->tpcMerger; } - GPUTPCGMMerger& GetTPCMerger() { return processors()->tpcMerger; } GPUDisplayInterface* GetEventDisplay() { return mEventDisplay.get(); } const GPUQA* GetQA() const { return mQAFromForeignChain ? mQAFromForeignChain->mQA.get() : mQA.get(); } GPUQA* GetQA() { return mQAFromForeignChain ? mQAFromForeignChain->mQA.get() : mQA.get(); } @@ -155,7 +161,6 @@ class GPUChainTracking : public GPUChain int32_t ForwardTPCDigits(); int32_t RunTPCTrackingSectors(); int32_t RunTPCTrackingMerger(bool synchronizeOutput = true); - template int32_t RunTRDTracking(); template int32_t DoTRDGPUTracking(T* externalInstance = nullptr); @@ -164,22 +169,22 @@ class GPUChainTracking : public GPUChain int32_t RunRefit(); // Getters / setters for parameters - const CorrectionMapsHelper* GetTPCTransformHelper() const { return processors()->calibObjects.fastTransformHelper; } - const TPCPadGainCalib* GetTPCPadGainCalib() const { return processors()->calibObjects.tpcPadGain; } - const TPCZSLinkMapping* GetTPCZSLinkMapping() const { return processors()->calibObjects.tpcZSLinkMapping; } - const o2::tpc::CalibdEdxContainer* GetdEdxCalibContainer() const { return processors()->calibObjects.dEdxCalibContainer; } - const o2::base::MatLayerCylSet* GetMatLUT() const { return processors()->calibObjects.matLUT; } - const GPUTRDGeometry* GetTRDGeometry() const { return (GPUTRDGeometry*)processors()->calibObjects.trdGeometry; } - const o2::base::Propagator* GetO2Propagator() const { return processors()->calibObjects.o2Propagator; } + const CorrectionMapsHelper* GetTPCTransformHelper() const; + const TPCPadGainCalib* GetTPCPadGainCalib() const; + const TPCZSLinkMapping* GetTPCZSLinkMapping() const; + const o2::tpc::CalibdEdxContainer* GetdEdxCalibContainer() const; + const o2::base::MatLayerCylSet* GetMatLUT() const; + const GPUTRDGeometry* GetTRDGeometry() const; + const o2::base::Propagator* GetO2Propagator() const; const o2::base::Propagator* GetDeviceO2Propagator(); void SetTPCFastTransform(std::unique_ptr&& tpcFastTransform, std::unique_ptr&& tpcTransformHelper); void SetMatLUT(std::unique_ptr&& lut); void SetTRDGeometry(std::unique_ptr&& geo); - void SetMatLUT(const o2::base::MatLayerCylSet* lut) { processors()->calibObjects.matLUT = lut; } - void SetTRDGeometry(const o2::trd::GeometryFlat* geo) { processors()->calibObjects.trdGeometry = geo; } + void SetMatLUT(const o2::base::MatLayerCylSet* lut); + void SetTRDGeometry(const o2::trd::GeometryFlat* geo); void SetO2Propagator(const o2::base::Propagator* prop); - void SetCalibObjects(const GPUCalibObjectsConst& obj) { processors()->calibObjects = obj; } - void SetCalibObjects(const GPUCalibObjects& obj) { memcpy((void*)&processors()->calibObjects, (const void*)&obj, sizeof(obj)); } + void SetCalibObjects(const GPUCalibObjectsConst& obj); + void SetCalibObjects(const GPUCalibObjects& obj); void SetUpdateCalibObjects(const GPUCalibObjectsConst& obj, const GPUNewCalibValues& vals); void SetSubOutputControl(int32_t i, GPUOutputControl* v) { mSubOutputControls[i] = v; } void SetFinalInputCallback(std::function v) { mWaitForFinalInputs = v; } @@ -298,6 +303,8 @@ class GPUChainTracking : public GPUChain void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); + template + int32_t RunTRDTrackingInternal(); uint32_t StreamForSector(uint32_t sector) const; std::mutex mMutexUpdateCalib; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 7db0ba66305e9..981d565852d28 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -19,11 +19,20 @@ #include "GPUMemorySizeScalers.h" #include "GPUTrackingInputProvider.h" #include "GPUNewCalibValues.h" -#include - -#ifdef GPUCA_O2_LIB -#include "CommonDataFormat/InteractionRecord.h" -#endif +#include "GPUConstantMem.h" +#include "CfChargePos.h" +#include "CfArray2D.h" +#include "GPUGeneralKernels.h" +#include "GPUTPCCFStreamCompaction.h" +#include "GPUTPCCFChargeMapFiller.h" +#include "GPUTPCCFDecodeZS.h" +#include "GPUTPCCFCheckPadBaseline.h" +#include "GPUTPCCFPeakFinder.h" +#include "GPUTPCCFNoiseSuppression.h" +#include "GPUTPCCFDeconvolution.h" +#include "GPUTPCCFClusterizer.h" +#include "GPUTPCCFGather.h" +#include "GPUTPCCFMCLabelFlattener.h" #include "GPUTriggerOutputs.h" #include "GPUHostDataTypes.h" #include "GPUTPCCFChainContext.h" @@ -32,18 +41,24 @@ #include "DataFormatsTPC/Digit.h" #include "DataFormatsTPC/Constants.h" #include "TPCBase/RDHUtils.h" +#include "GPULogging.h" + +#ifdef GPUCA_HAS_ONNX +#include "GPUTPCNNClusterizerKernels.h" +#include "GPUTPCNNClusterizerHost.h" +#endif + +#ifdef GPUCA_O2_LIB +#include "CommonDataFormat/InteractionRecord.h" +#endif #include "utils/strtag.h" +#include #ifndef GPUCA_NO_VC #include #endif -#ifdef GPUCA_HAS_ONNX -#include "GPUTPCNNClusterizerKernels.h" -#include "GPUTPCNNClusterizerHost.h" -#endif - using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; @@ -791,7 +806,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) using ChargeMapType = decltype(*clustererShadow.mPchargeMap); using PeakMapType = decltype(*clustererShadow.mPpeakMap); - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); if (fragment.index == 0) { runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 8fb6fc4771658..fc07a91004c5f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -19,6 +19,9 @@ #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" #include "GPUDefParametersRuntime.h" +#include "GPUConstantMem.h" // TODO: Try to get rid of as many GPUConstantMem includes as possible! +#include "GPUTPCCompressionKernels.h" +#include "GPUTPCDecompressionKernels.h" #include "utils/strtag.h" #include diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index c42d9622f5332..5d05cd6a97776 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -15,6 +15,8 @@ #include "GPUChainTracking.h" #include "GPUTrackingInputProvider.h" #include "GPUMemorySizeScalers.h" +#include "GPUConstantMem.h" +#include "GPUTPCClusterFilter.h" #include #include #include @@ -23,8 +25,6 @@ #include "bitmapfile.h" #endif -#include "GPUTPCClusterFilter.h" - #define PROFILE_MAX_SIZE (100 * 1024 * 1024) using namespace o2::gpu; @@ -209,7 +209,7 @@ void GPUChainTracking::PrintDebugOutput() void GPUChainTracking::PrintOutputStat() { int32_t nTracks = 0, nAttachedClusters = 0, nAttachedClustersFitted = 0, nAdjacentClusters = 0; - uint32_t nCls = GetProcessingSettings().doublePipeline ? mIOPtrs.clustersNative->nClustersTotal : GetTPCMerger().NMaxClusters(); + uint32_t nCls = GetProcessingSettings().doublePipeline ? mIOPtrs.clustersNative->nClustersTotal : processors()->tpcMerger.NMaxClusters(); if (GetProcessingSettings().createO2Output > 1) { nTracks = mIOPtrs.nOutputTracksTPCO2; nAttachedClusters = mIOPtrs.nMergedTrackHits; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h index dc1a665e6052c..e02419955001a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUChainTracking.h +/// \file GPUChainTrackingDefs.h /// \author David Rohr #ifndef GPUCHAINTRACKINGDEFS_H diff --git a/GPU/GPUTracking/Global/GPUChainTrackingGetters.inc b/GPU/GPUTracking/Global/GPUChainTrackingGetters.inc new file mode 100644 index 0000000000000..5b72a8f23c242 --- /dev/null +++ b/GPU/GPUTracking/Global/GPUChainTrackingGetters.inc @@ -0,0 +1,36 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUChainTrackingGetters.inc +/// \author David Rohr + +#ifndef GPUCHAINTRACKINGGETTERS_INC_H +#define GPUCHAINTRACKINGGETTERS_INC_H + +#include "GPUChainTracking.h" +#include "GPUConstantMem.h" + +namespace o2::gpu +{ +inline const CorrectionMapsHelper* GPUChainTracking::GetTPCTransformHelper() const { return processors()->calibObjects.fastTransformHelper; } +inline const TPCPadGainCalib* GPUChainTracking::GetTPCPadGainCalib() const { return processors()->calibObjects.tpcPadGain; } +inline const TPCZSLinkMapping* GPUChainTracking::GetTPCZSLinkMapping() const { return processors()->calibObjects.tpcZSLinkMapping; } +inline const o2::tpc::CalibdEdxContainer* GPUChainTracking::GetdEdxCalibContainer() const { return processors()->calibObjects.dEdxCalibContainer; } +inline const o2::base::MatLayerCylSet* GPUChainTracking::GetMatLUT() const { return processors()->calibObjects.matLUT; } +inline const GPUTRDGeometry* GPUChainTracking::GetTRDGeometry() const { return (GPUTRDGeometry*)processors()->calibObjects.trdGeometry; } +inline const o2::base::Propagator* GPUChainTracking::GetO2Propagator() const { return processors()->calibObjects.o2Propagator; } +inline void GPUChainTracking::SetMatLUT(const o2::base::MatLayerCylSet* lut) { processors()->calibObjects.matLUT = lut; } +inline void GPUChainTracking::SetTRDGeometry(const o2::trd::GeometryFlat* geo) { processors()->calibObjects.trdGeometry = geo; } +inline void GPUChainTracking::SetCalibObjects(const GPUCalibObjectsConst& obj) { processors()->calibObjects = obj; } +inline void GPUChainTracking::SetCalibObjects(const GPUCalibObjects& obj) { memcpy((void*)&processors()->calibObjects, (const void*)&obj, sizeof(obj)); } +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 4f7846b852b98..5e7672022b3ff 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -34,6 +34,7 @@ #include "GPUTrackingInputProvider.h" #include "TPCZSLinkMapping.h" #include "GPUTriggerOutputs.h" +#include "GPUConstantMem.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index a647c213660c9..163f08634ef86 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -17,6 +17,11 @@ #include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" #include "GPUQA.h" +#include "GPUTPCGMMerger.h" +#include "GPUConstantMem.h" +#include "GPUTPCGMMergerGPU.h" +#include "GPUTPCGMO2Output.h" +#include "GPUTPCGlobalDebugSortKernels.h" #include "utils/strtag.h" #include diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 8d1efd7011227..4662b5464f710 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -13,8 +13,12 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPULogging.h" #include "GPUO2DataTypes.h" +#include "GPUTrackingRefit.h" +#include "GPUConstantMem.h" +#include "GPUTrackingRefitKernel.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 962b0922eeecc..635641c00ae14 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -20,6 +20,16 @@ #include "GPUTrackingInputProvider.h" #include "GPUTPCClusterOccupancyMap.h" #include "GPUDefParametersRuntime.h" +#include "GPUTPCExtrapolationTracking.h" +#include "GPUTPCCreateOccupancyMap.h" +#include "GPUTPCCreateTrackingData.h" +#include "GPUTPCNeighboursFinder.h" +#include "GPUTPCNeighboursCleaner.h" +#include "GPUTPCStartHitsFinder.h" +#include "GPUTPCStartHitsSorter.h" +#include "GPUTPCTrackletConstructor.h" +#include "GPUTPCTrackletSelector.h" +#include "GPUTPCSectorDebugSortKernels.h" #include "utils/strtag.h" #include diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx index 0f17bbcc26842..f9011131803e3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx @@ -21,13 +21,19 @@ #include "GPUTRDTracker.h" #include "GPUTrackingInputProvider.h" #include "GPUTRDTrackerKernels.h" +#include "GPUConstantMem.h" #include "utils/strtag.h" using namespace o2::gpu; using namespace o2::trd; -template int32_t GPUChainTracking::RunTRDTracking() +{ + return GetProcessingSettings().trdTrackModelO2 ? RunTRDTrackingInternal() : RunTRDTrackingInternal(); +} + +template +int32_t GPUChainTracking::RunTRDTrackingInternal() { auto& Tracker = processors()->getTRDTracker(); if (!Tracker.IsInitialized()) { @@ -189,9 +195,7 @@ int32_t GPUChainTracking::DoTRDGPUTracking(T* externalInstance) return (0); } -template int32_t GPUChainTracking::RunTRDTracking(); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTrackerGPU*); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTracker*); -template int32_t GPUChainTracking::RunTRDTracking(); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTracker*); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTrackerGPU*); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index db5e5ae3aeb75..c9d4d269f070c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -19,9 +19,13 @@ #include "GPUTPCClusterData.h" #include "GPUReconstructionConvert.h" #include "GPUMemorySizeScalers.h" +#include "GPUTPCConvert.h" #include "AliHLTTPCRawCluster.h" +#include "GPUConstantMem.h" +#include "GPUTPCConvertKernel.h" #include "DataFormatsTPC/ClusterNative.h" +#include "DataFormatsTPC/ZeroSuppression.h" #include "CommonDataFormat/InteractionRecord.h" #include "utils/strtag.h" diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx index a5457bf3f2f23..7ef9baa903fbe 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx @@ -18,6 +18,9 @@ #include "GPUReconstruction.h" #include "GPUTPCClusterOccupancyMap.h" #include "GPUErrors.h" +#include "GPUParam.h" +#include "DataFormatsTPC/ClusterNative.h" +#include "GPUTRDSpacePoint.h" using namespace o2::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 4dac56afed671..81eb2c285192b 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -15,6 +15,7 @@ #include "GPUO2Interface.h" #include "GPUReconstruction.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPUChainITS.h" #include "GPUMemorySizeScalers.h" #include "GPUOutputControl.h" @@ -23,6 +24,7 @@ #include "GPUParam.inc" #include "GPUQA.h" #include "GPUOutputControl.h" +#include "DetectorsBase/Propagator.h" #include #include #include diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 82b21e2045b8e..4f960a8e1ec76 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -757,7 +757,7 @@ struct MergeBorderTracks_compMin { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { if (cmpMax) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); @@ -1839,13 +1839,13 @@ struct GPUTPCGMMergerSortTracksQPt_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } @@ -2065,7 +2065,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 1e4cc633eb4ca..2f8fbecadce5f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -36,6 +36,7 @@ #include "GPUTPCClusterOccupancyMap.h" #include "GPUTrackingRefit.h" #include "CorrectionMapsHelper.h" +#include "GPUConstantMem.h" using namespace o2::gpu; using namespace gputpcgmmergertypes; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 13b34a0a64a84..6640b556c3011 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -102,7 +102,7 @@ struct GPUTPCGMO2OutputSort_comp { }; template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx index 7d83ff9abd91c..e66ad71783dbd 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx @@ -16,6 +16,7 @@ #include "GPUReconstruction.h" #include "GPUTPCHitId.h" #include "GPUTPCTrack.h" +#include "GPULogging.h" #include #include diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index b32db2bfebf11..d4c83f92a2157 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -17,11 +17,24 @@ #include "GPUReconstructionTimeframe.h" #include "GPUReconstructionConvert.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPUTPCDef.h" #include "GPUQA.h" +#include "GPUParam.h" #include "display/GPUDisplayInterface.h" #include "genEvents.h" +#include "TPCFastTransform.h" +#include "CorrectionMapsHelper.h" +#include "GPUTPCGMMergedTrack.h" +#include "GPUSettings.h" +#include "GPUConstantMem.h" + +#include "GPUO2DataTypes.h" +#include "GPUChainITS.h" + +#include "DataFormatsTPC/CompressedClusters.h" + #include #include #include @@ -32,6 +45,7 @@ #include #include #include +#include #ifndef _WIN32 #include @@ -48,15 +62,6 @@ #include "utils/qmaths_helpers.h" #include "utils/vecpod.h" -#include "TPCFastTransform.h" -#include "CorrectionMapsHelper.h" -#include "GPUTPCGMMergedTrack.h" -#include "GPUSettings.h" -#include - -#include "GPUO2DataTypes.h" -#include "GPUChainITS.h" - using namespace o2::gpu; // #define BROKEN_EVENTS @@ -915,7 +920,7 @@ int32_t main(int argc, char** argv) nEventsProcessed++; if (configStandalone.timeFrameTime) { - double nClusters = chainTracking->GetTPCMerger().NMaxClusters(); + double nClusters = chainTracking->GetProcessors()->tpcMerger.NMaxClusters(); if (nClusters > 0) { const int32_t nOrbits = 32; const double colRate = 50000; diff --git a/GPU/GPUTracking/Standalone/tools/createGeo.C b/GPU/GPUTracking/Standalone/tools/createGeo.C index 307d687f716d1..c454978177ad6 100644 --- a/GPU/GPUTracking/Standalone/tools/createGeo.C +++ b/GPU/GPUTracking/Standalone/tools/createGeo.C @@ -20,6 +20,7 @@ #include "GPUO2Interface.h" #include "GPUReconstruction.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Standalone/tools/createLUT.C b/GPU/GPUTracking/Standalone/tools/createLUT.C index 7bb4edbf89f18..b4a053aa46c66 100644 --- a/GPU/GPUTracking/Standalone/tools/createLUT.C +++ b/GPU/GPUTracking/Standalone/tools/createLUT.C @@ -18,6 +18,7 @@ #include "GPUO2Interface.h" #include "GPUReconstruction.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/TPCClusterFinder/Array2D.h b/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h similarity index 81% rename from GPU/GPUTracking/TPCClusterFinder/Array2D.h rename to GPU/GPUTracking/TPCClusterFinder/CfArray2D.h index b62176fdc4365..3c8bcf94da4b3 100644 --- a/GPU/GPUTracking/TPCClusterFinder/Array2D.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h @@ -16,22 +16,22 @@ #define O2_GPU_ARRAY2D_H #include "clusterFinderDefs.h" -#include "ChargePos.h" +#include "CfChargePos.h" namespace o2::gpu { template -class AbstractArray2D +class AbstractCfArray2D { public: - GPUdi() explicit AbstractArray2D(T* d) : data(d) {} + GPUdi() explicit AbstractCfArray2D(T* d) : data(d) {} - GPUdi() T& operator[](const ChargePos& p) { return data[Layout::idx(p)]; } - GPUdi() const T& operator[](const ChargePos& p) const { return data[Layout::idx(p)]; } + GPUdi() T& operator[](const CfChargePos& p) { return data[Layout::idx(p)]; } + GPUdi() const T& operator[](const CfChargePos& p) const { return data[Layout::idx(p)]; } - GPUdi() void safeWrite(const ChargePos& p, const T& v) + GPUdi() void safeWrite(const CfChargePos& p, const T& v) { if (data != nullptr) { (*this)[p] = v; @@ -52,7 +52,7 @@ class TilingLayout WidthInTiles = (TPC_NUM_OF_PADS + Width - 1) / Width, }; - GPUdi() static tpccf::SizeT idx(const ChargePos& p) + GPUdi() static tpccf::SizeT idx(const CfChargePos& p) { const tpccf::SizeT tilePad = p.gpad / Width; const tpccf::SizeT tileTime = p.timePadded / Height; @@ -72,7 +72,7 @@ class TilingLayout class LinearLayout { public: - GPUdi() static tpccf::SizeT idx(const ChargePos& p) + GPUdi() static tpccf::SizeT idx(const CfChargePos& p) { return TPC_NUM_OF_PADS * p.timePadded + p.gpad; } @@ -119,7 +119,7 @@ using TPCMapMemoryLayout = LinearLayout; #endif template -using Array2D = AbstractArray2D>; +using CfArray2D = AbstractCfArray2D>; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/CfChargePos.h similarity index 80% rename from GPU/GPUTracking/TPCClusterFinder/ChargePos.h rename to GPU/GPUTracking/TPCClusterFinder/CfChargePos.h index cdd489e0ef938..bf6ce2fc804ba 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfChargePos.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file ChargePos.h +/// \file CfChargePos.h /// \author Felix Weiglhofer #ifndef O2_GPU_CHARGE_POS_H @@ -22,20 +22,20 @@ namespace o2::gpu #define INVALID_TIME_BIN (-GPUCF_PADDING_TIME - 1) -struct ChargePos { +struct CfChargePos { tpccf::GlobalPad gpad; tpccf::TPCFragmentTime timePadded; - GPUdDefault() ChargePos() = default; + GPUdDefault() CfChargePos() = default; - constexpr GPUhdi() ChargePos(tpccf::Row row, tpccf::Pad pad, tpccf::TPCFragmentTime t) + constexpr GPUhdi() CfChargePos(tpccf::Row row, tpccf::Pad pad, tpccf::TPCFragmentTime t) : gpad(tpcGlobalPadIdx(row, pad)), timePadded(t + GPUCF_PADDING_TIME) { } - GPUdi() ChargePos(const tpccf::GlobalPad& p, const tpccf::TPCFragmentTime& t) : gpad(p), timePadded(t) {} + GPUdi() CfChargePos(const tpccf::GlobalPad& p, const tpccf::TPCFragmentTime& t) : gpad(p), timePadded(t) {} - GPUdi() ChargePos delta(const tpccf::Delta2& d) const + GPUdi() CfChargePos delta(const tpccf::Delta2& d) const { return {tpccf::GlobalPad(gpad + d.x), tpccf::TPCFragmentTime(timePadded + d.y)}; } @@ -56,7 +56,7 @@ struct ChargePos { } }; -inline constexpr ChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; +inline constexpr CfChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h index 75dcc166abd9b..96f4893c74af3 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h @@ -17,7 +17,7 @@ #include "clusterFinderDefs.h" #include "GPUCommonAlgorithm.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "CfConsts.h" namespace o2::gpu @@ -169,14 +169,14 @@ class CfUtils template static GPUdi() void blockLoad( - const Array2D& map, + const CfArray2D& map, uint32_t wgSize, uint32_t elems, uint16_t ll, uint32_t offset, uint32_t N, GPUconstexprref() const tpccf::Delta2* neighbors, - const ChargePos* posBcast, + const CfChargePos* posBcast, GPUgeneric() T* buf) { #if defined(GPUCA_GPUCODE) @@ -186,7 +186,7 @@ class CfUtils tpccf::Delta2 d = neighbors[x + offset]; for (uint32_t i = y; i < wgSize; i += (elems / N)) { - ChargePos readFrom = posBcast[i]; + CfChargePos readFrom = posBcast[i]; uint32_t writeTo = N * i + x; buf[writeTo] = map[readFrom.delta(d)]; } @@ -196,7 +196,7 @@ class CfUtils return; } - ChargePos readFrom = posBcast[ll]; + CfChargePos readFrom = posBcast[ll]; GPUbarrier(); @@ -213,14 +213,14 @@ class CfUtils template static GPUdi() void condBlockLoad( - const Array2D& map, + const CfArray2D& map, uint16_t wgSize, uint16_t elems, uint16_t ll, uint16_t offset, uint16_t N, GPUconstexprref() const tpccf::Delta2* neighbors, - const ChargePos* posBcast, + const CfChargePos* posBcast, const uint8_t* aboveThreshold, GPUgeneric() T* buf) { @@ -230,7 +230,7 @@ class CfUtils uint16_t x = ll % N; tpccf::Delta2 d = neighbors[x + offset]; for (uint32_t i = y; i < wgSize; i += (elems / N)) { - ChargePos readFrom = posBcast[i]; + CfChargePos readFrom = posBcast[i]; uint8_t above = aboveThreshold[i]; uint32_t writeTo = N * i + x; T v(0); @@ -247,7 +247,7 @@ class CfUtils return; } - ChargePos readFrom = posBcast[ll]; + CfChargePos readFrom = posBcast[ll]; uint8_t above = aboveThreshold[ll]; GPUbarrier(); diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index 622da856af805..a80283b91c940 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -58,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, TPCTime timeOffset) +GPUd() void ClusterAccumulator::finalize(const CfChargePos& pos, const Charge q, TPCTime timeOffset) { mQtot += q; @@ -75,7 +75,7 @@ GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, T mTimeMean += timeOffset + pos.time(); } -GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const Array2D& chargeMap) +GPUd() bool ClusterAccumulator::toNative(const CfChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const CfArray2D& chargeMap) { Pad pad = pos.pad(); diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index 90d977372b201..fb208ca0150d4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -17,7 +17,7 @@ #include "clusterFinderDefs.h" #include "PackedCharge.h" -#include "Array2D.h" +#include "CfArray2D.h" namespace o2 { @@ -30,7 +30,7 @@ struct ClusterNative; namespace gpu { -struct ChargePos; +struct CfChargePos; struct GPUParam; class GPUTPCGeometry; @@ -52,8 +52,8 @@ class ClusterAccumulator mSplitInTime = splitInTime; } - GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime); - GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D&); + GPUd() void finalize(const CfChargePos&, const tpccf::Charge, tpccf::TPCTime); + GPUd() bool toNative(const CfChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const CfArray2D&); private: float mQtot = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx index 8dbc5804f8fb8..d2ca3d419c138 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx @@ -13,7 +13,7 @@ /// \author Felix Weiglhofer #include "GPUTPCCFChargeMapFiller.h" -#include "ChargePos.h" +#include "CfChargePos.h" #include "DataFormatsTPC/Digit.h" #include "TPCPadGainCalib.h" @@ -23,14 +23,14 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFChargeMapFiller::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D indexMap(clusterer.mPindexMap); + CfArray2D indexMap(clusterer.mPindexMap); fillIndexMapImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPmemory->fragment, clusterer.mPdigits, indexMap, clusterer.mPmemory->counters.nDigitsInFragment); } GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const CfFragment& fragment, const tpc::Digit* digits, - Array2D& indexMap, + CfArray2D& indexMap, size_t maxDigit) { size_t idx = get_global_id(0); @@ -39,21 +39,21 @@ GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t n } CPU_ONLY(idx += fragment.digitsStart); CPU_ONLY(tpc::Digit digit = digits[idx]); - CPU_ONLY(ChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp()))); + CPU_ONLY(CfChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp()))); CPU_ONLY(indexMap.safeWrite(pos, idx)); } template <> GPUdii() void GPUTPCCFChargeMapFiller::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); fillFromDigitsImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, clusterer.mPmemory->counters.nPositions, clusterer.mPdigits, clusterer.mPpositions, chargeMap); } GPUd() void GPUTPCCFChargeMapFiller::fillFromDigitsImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& clusterer, const CfFragment& fragment, size_t digitNum, const tpc::Digit* digits, - ChargePos* positions, - Array2D& chargeMap) + CfChargePos* positions, + CfArray2D& chargeMap) { size_t idx = get_global_id(0); if (idx >= digitNum) { @@ -61,7 +61,7 @@ GPUd() void GPUTPCCFChargeMapFiller::fillFromDigitsImpl(int32_t nBlocks, int32_t } tpc::Digit digit = digits[fragment.digitsStart + idx]; - ChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp())); + CfChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp())); positions[idx] = pos; float q = digit.getChargeFloat(); q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(clusterer.mISector, digit.getRow(), digit.getPad()); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h index f7aab78c33bd1..800ba786c2105 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::tpc @@ -30,7 +30,7 @@ class Digit; namespace o2::gpu { -struct ChargePos; +struct CfChargePos; class GPUTPCCFChargeMapFiller : public GPUKernelTemplate { @@ -55,9 +55,9 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); - static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, Array2D&, size_t); + static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, CfArray2D&, size_t); - static GPUd() void fillFromDigitsImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, size_t, const tpc::Digit*, ChargePos*, Array2D&); + static GPUd() void fillFromDigitsImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, size_t, const tpc::Digit*, CfChargePos*, CfArray2D&); private: static GPUd() size_t findTransition(int32_t, const tpc::Digit*, size_t, size_t); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 1e76860331de6..ec084c308312e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -13,7 +13,7 @@ /// \author Felix Weiglhofer #include "GPUTPCCFCheckPadBaseline.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" #include "GPUTPCGeometry.h" #include "clusterFinderDefs.h" @@ -33,10 +33,10 @@ template <> GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { const CfFragment& fragment = clusterer.mPmemory->fragment; - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); int32_t basePad = iBlock * PadsPerCacheline; - ChargePos basePos = padToChargePos(basePad, clusterer); + CfChargePos basePos = padToCfChargePos(basePad, clusterer); if (not basePos.valid()) { return; @@ -55,7 +55,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread bool handlePad = localTimeBin == 0; for (tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin(); t < fragment.lastNonOverlapTimeBin(); t += NumOfCachedTimebins) { - const ChargePos pos = basePos.delta({localPadId, int16_t(t + localTimeBin)}); + const CfChargePos pos = basePos.delta({localPadId, int16_t(t + localTimeBin)}); smem.charges[localPadId][localTimeBin] = (pos.valid()) ? chargeMap[pos].unpack() : 0; GPUbarrier(); if (handlePad) { @@ -150,7 +150,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread #endif } -GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer) +GPUd() CfChargePos GPUTPCCFCheckPadBaseline::padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer) { constexpr GPUTPCGeometry geo; @@ -161,12 +161,12 @@ GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GP if (0 <= padInRow && padInRow < CAMath::nextMultipleOf(npads)) { int32_t cachelineOffset = padInRow % PadsPerCacheline; pad -= cachelineOffset; - return ChargePos{r, Pad(padInRow - cachelineOffset), 0}; + return CfChargePos{r, Pad(padInRow - cachelineOffset), 0}; } padOffset += npads; } - return ChargePos{0, 0, INVALID_TIME_BIN}; + return CfChargePos{0, 0, INVALID_TIME_BIN}; } GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTPCClusterFinder& clusterer, int32_t totalCharges, int32_t consecCharges, Charge maxCharge) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index d6daa6803ca39..2403aa6d29ecd 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -52,7 +52,7 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer); private: - GPUd() static ChargePos padToChargePos(int32_t& pad, const GPUTPCClusterFinder&); + GPUd() static CfChargePos padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder&); GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge); }; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index 2131347decec6..c9c6b157499f2 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -30,7 +30,7 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index 79f3325ed9ad2..466d13d3254de 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::tpc @@ -38,7 +38,7 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate public: static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFClusterizer); struct GPUSharedMemory { - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_BUILD_N]; uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; }; @@ -57,16 +57,16 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t); - static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*); + static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const CfArray2D&, const CfChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*); - static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); + static GPUd() void buildCluster(const GPUSettingsRec&, const CfArray2D&, CfChargePos, CfChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); private: - static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*); + static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const CfChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*); - static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*); + static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const CfChargePos&, ClusterAccumulator*, MCLabelAccumulator*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc index 8a6b73be8bd8d..e32abbf37584f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc @@ -19,8 +19,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t processorType& clusterer, const CfFragment& fragment, GPUSharedMemory& smem, - const Array2D& chargeMap, - const ChargePos* filteredPeakPositions, + const CfArray2D& chargeMap, + const CfChargePos* filteredPeakPositions, const GPUSettingsRec& calib, MCLabelAccumulator* labelAcc, uint32_t clusternum, @@ -34,7 +34,7 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t // For certain configurations dummy work items are added, so the total // number of work items is dividable by 64. // These dummy items also compute the last cluster but discard the result. - ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; + CfChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; Charge charge = chargeMap[pos].unpack(); ClusterAccumulator pc; @@ -94,7 +94,7 @@ GPUdii() void GPUTPCCFClusterizer::updateClusterInner( uint16_t lid, uint16_t N, const PackedCharge* buf, - const ChargePos& pos, + const CfChargePos& pos, ClusterAccumulator* cluster, MCLabelAccumulator* labelAcc, uint8_t* innerAboveThreshold) @@ -125,7 +125,7 @@ GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( uint16_t M, uint16_t offset, const PackedCharge* buf, - const ChargePos& pos, + const CfChargePos& pos, ClusterAccumulator* cluster, MCLabelAccumulator* labelAcc) { @@ -144,9 +144,9 @@ GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( GPUdii() void GPUTPCCFClusterizer::buildCluster( const GPUSettingsRec& calib, - const Array2D& chargeMap, - ChargePos pos, - ChargePos* posBcast, + const CfArray2D& chargeMap, + CfChargePos pos, + CfChargePos* posBcast, PackedCharge* buf, uint8_t* innerAboveThreshold, ClusterAccumulator* myCluster, diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index 6662b93eccb78..312085d2947ab 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCFDecodeZS.h" #include "GPUCommonMath.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" #include "CfUtils.h" #include "CommonConstants/LHCConstants.h" @@ -53,8 +53,8 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared if (zs.count[endpoint] == 0) { return; } - ChargePos* positions = clusterer.mPpositions; - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfChargePos* positions = clusterer.mPpositions; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset; if (iThread == 0) { const int32_t region = endpoint / 2; @@ -175,7 +175,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared TPCTime globalTime = timeBin + l; bool inFragment = fragment.contains(globalTime); Row row = rowOffset + m; - ChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN); + CfChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN); positions[nDigitsTmp++] = pos; if (inFragment) { @@ -552,7 +552,7 @@ GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorTy GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset) { const uint32_t sector = clusterer.mISector; - ChargePos* positions = clusterer.mPpositions; + CfChargePos* positions = clusterer.mPpositions; #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (padAndRow.getRow() >= GPUCA_ROW_COUNT) { positions[positionOffset] = INVALID_CHARGE_POS; @@ -560,9 +560,9 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, floa return; } #endif - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - ChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); + CfChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); positions[positionOffset] = pos; charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad()); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx index dab8123698abf..429d51685e504 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCFDeconvolution.h" #include "CfConsts.h" #include "CfUtils.h" -#include "ChargePos.h" +#include "CfChargePos.h" #include "GPUDefMacros.h" using namespace o2::gpu; @@ -24,15 +24,15 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); GPUTPCCFDeconvolution::deconvolutionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions); } GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, - const Array2D& peakMap, - Array2D& chargeMap, - const ChargePos* positions, + const CfArray2D& peakMap, + CfArray2D& chargeMap, + const CfChargePos* positions, const uint32_t digitnum) { SizeT idx = get_global_id(0); @@ -40,7 +40,7 @@ GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t bool iamDummy = (idx >= digitnum); idx = iamDummy ? digitnum - 1 : idx; - ChargePos pos = positions[idx]; + CfChargePos pos = positions[idx]; bool iamPeak = CfUtils::isPeak(peakMap[pos]); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h index 78fcc8ba1785a..e971a042e95a4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::gpu @@ -31,7 +31,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate public: static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDeconvolution); struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { - ChargePos posBcast1[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast1[SCRATCH_PAD_WORK_GROUP_SIZE]; uint8_t aboveThresholdBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; uint8_t buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_COUNT_N]; }; @@ -51,7 +51,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); private: - static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const Array2D&, Array2D&, const ChargePos*, const uint32_t); + static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D&, CfArray2D&, const CfChargePos*, const uint32_t); static GPUdi() uint8_t countPeaksInner(uint16_t, const uint8_t*, uint8_t*); static GPUdi() uint8_t countPeaksOuter(uint16_t, uint8_t, const uint8_t*); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx index f3a914cbfcaee..4dfa50d9439e4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx @@ -13,10 +13,10 @@ /// \author Felix Weiglhofer #include "GPUTPCCFNoiseSuppression.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "CfConsts.h" #include "CfUtils.h" -#include "ChargePos.h" +#include "CfChargePos.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -24,29 +24,29 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFNoiseSuppression::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); noiseSuppressionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, clusterer.Param().rec, chargeMap, isPeakMap, clusterer.mPpeakPositions, clusterer.mPmemory->counters.nPeaks, clusterer.mPisPeak); } template <> GPUdii() void GPUTPCCFNoiseSuppression::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D isPeakMap(clusterer.mPpeakMap); updatePeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPpeakPositions, clusterer.mPisPeak, clusterer.mPmemory->counters.nPeaks, isPeakMap); } GPUdii() void GPUTPCCFNoiseSuppression::noiseSuppressionImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, const GPUSettingsRec& calibration, - const Array2D& chargeMap, - const Array2D& peakMap, - const ChargePos* peakPositions, + const CfArray2D& chargeMap, + const CfArray2D& peakMap, + const CfChargePos* peakPositions, const uint32_t peaknum, uint8_t* isPeakPredicate) { SizeT idx = get_global_id(0); - ChargePos pos = peakPositions[CAMath::Min(idx, (SizeT)(peaknum - 1))]; + CfChargePos pos = peakPositions[CAMath::Min(idx, (SizeT)(peaknum - 1))]; Charge charge = chargeMap[pos].unpack(); uint64_t minimas, bigger, peaksAround; @@ -75,10 +75,10 @@ GPUdii() void GPUTPCCFNoiseSuppression::noiseSuppressionImpl(int32_t nBlocks, in } GPUd() void GPUTPCCFNoiseSuppression::updatePeaksImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, - const ChargePos* peakPositions, + const CfChargePos* peakPositions, const uint8_t* isPeak, const uint32_t peakNum, - Array2D& peakMap) + CfArray2D& peakMap) { SizeT idx = get_global_id(0); @@ -86,7 +86,7 @@ GPUd() void GPUTPCCFNoiseSuppression::updatePeaksImpl(int32_t nBlocks, int32_t n return; } - ChargePos pos = peakPositions[idx]; + CfChargePos pos = peakPositions[idx]; uint8_t peak = isPeak[idx]; @@ -164,12 +164,12 @@ GPUdi() bool GPUTPCCFNoiseSuppression::keepPeak( } GPUd() void GPUTPCCFNoiseSuppression::findMinimaAndPeaks( - const Array2D& chargeMap, - const Array2D& peakMap, + const CfArray2D& chargeMap, + const CfArray2D& peakMap, const GPUSettingsRec& calibration, float q, - const ChargePos& pos, - ChargePos* posBcast, + const CfChargePos& pos, + CfChargePos* posBcast, PackedCharge* buf, uint64_t* minimas, uint64_t* bigger, diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index 71236bc317443..59196da11079b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -19,13 +19,13 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::gpu { -struct ChargePos; +struct CfChargePos; class GPUTPCCFNoiseSuppression : public GPUKernelTemplate { @@ -38,7 +38,7 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks)); struct GPUSharedMemory { - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_NOISE_N]; }; @@ -57,9 +57,9 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); private: - static GPUd() void noiseSuppressionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const GPUSettingsRec&, const Array2D&, const Array2D&, const ChargePos*, const uint32_t, uint8_t*); + static GPUd() void noiseSuppressionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const GPUSettingsRec&, const CfArray2D&, const CfArray2D&, const CfChargePos*, const uint32_t, uint8_t*); - static GPUd() void updatePeaksImpl(int32_t, int32_t, int32_t, int32_t, const ChargePos*, const uint8_t*, const uint32_t, Array2D&); + static GPUd() void updatePeaksImpl(int32_t, int32_t, int32_t, int32_t, const CfChargePos*, const uint8_t*, const uint32_t, CfArray2D&); static GPUdi() void checkForMinima(const float, const float, const float, PackedCharge, int32_t, uint64_t*, uint64_t*); @@ -69,7 +69,7 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate static GPUdi() bool keepPeak(uint64_t, uint64_t); - static GPUd() void findMinimaAndPeaks(const Array2D&, const Array2D&, const GPUSettingsRec&, float, const ChargePos&, ChargePos*, PackedCharge*, uint64_t*, uint64_t*, uint64_t*); + static GPUd() void findMinimaAndPeaks(const CfArray2D&, const CfArray2D&, const GPUSettingsRec&, float, const CfChargePos&, CfChargePos*, PackedCharge*, uint64_t*, uint64_t*, uint64_t*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx index 1de922f716c14..6749ab8e8485e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx @@ -14,7 +14,7 @@ #include "GPUTPCCFPeakFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "CfUtils.h" #include "PackedCharge.h" #include "TPCPadGainCalib.h" @@ -25,19 +25,19 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFPeakFinder::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); findPeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, chargeMap, clusterer.mPpadIsNoisy, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions, clusterer.Param().rec, *clusterer.GetConstantMem()->calibObjects.tpcPadGain, clusterer.mPisPeak, isPeakMap); } GPUdii() bool GPUTPCCFPeakFinder::isPeak( GPUSharedMemory& smem, Charge q, - const ChargePos& pos, + const CfChargePos& pos, uint16_t N, - const Array2D& chargeMap, + const CfArray2D& chargeMap, const GPUSettingsRec& calib, - ChargePos* posBcast, + CfChargePos* posBcast, PackedCharge* buf) { uint16_t ll = get_local_id(0); @@ -91,21 +91,21 @@ GPUdii() bool GPUTPCCFPeakFinder::isPeak( } GPUd() void GPUTPCCFPeakFinder::findPeaksImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, - const Array2D& chargeMap, + const CfArray2D& chargeMap, const uint8_t* padHasLostBaseline, - const ChargePos* positions, + const CfChargePos* positions, SizeT digitnum, const GPUSettingsRec& calib, const TPCPadGainCalib& gainCorrection, // Only used for globalPad() function uint8_t* isPeakPredicate, - Array2D& peakMap) + CfArray2D& peakMap) { SizeT idx = get_global_id(0); // For certain configurations dummy work items are added, so the total // number of work items is dividable by 64. // These dummy items also compute the last digit but discard the result. - ChargePos pos = positions[CAMath::Min(idx, (SizeT)(digitnum - 1))]; + CfChargePos pos = positions[CAMath::Min(idx, (SizeT)(digitnum - 1))]; Charge charge = pos.valid() ? chargeMap[pos].unpack() : Charge(0); bool hasLostBaseline = padHasLostBaseline[gainCorrection.globalPad(pos.row(), pos.pad())]; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h index ec17d98322239..e480518ddc9dd 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h @@ -19,20 +19,20 @@ #include "GPUConstantMem.h" #include "clusterFinderDefs.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::gpu { -struct ChargePos; +struct CfChargePos; class GPUTPCCFPeakFinder : public GPUKernelTemplate { public: static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFPeakFinder); struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_SEARCH_N]; }; @@ -51,9 +51,9 @@ class GPUTPCCFPeakFinder : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); private: - static GPUd() void findPeaksImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const Array2D&, const uint8_t*, const ChargePos*, tpccf::SizeT, const GPUSettingsRec&, const TPCPadGainCalib&, uint8_t*, Array2D&); + static GPUd() void findPeaksImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D&, const uint8_t*, const CfChargePos*, tpccf::SizeT, const GPUSettingsRec&, const TPCPadGainCalib&, uint8_t*, CfArray2D&); - static GPUd() bool isPeak(GPUSharedMemory&, tpccf::Charge, const ChargePos&, uint16_t, const Array2D&, const GPUSettingsRec&, ChargePos*, PackedCharge*); + static GPUd() bool isPeak(GPUSharedMemory&, tpccf::Charge, const CfChargePos&, uint16_t, const CfArray2D&, const GPUSettingsRec&, CfChargePos*, PackedCharge*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index efed3643800b6..1da5a1158a8c2 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCFStreamCompaction.h" #include "GPUCommonAlgorithm.h" -#include "ChargePos.h" +#include "CfChargePos.h" #include "CfUtils.h" using namespace o2::gpu; @@ -92,7 +92,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread -GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, ChargePos* in, ChargePos* out) +GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out) { uint32_t nElems = CompactionElems(clusterer, stage); SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index 613c4ad9e5fa6..051391f12cc6d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -16,12 +16,14 @@ #include "GPUReconstruction.h" #include "GPUMemorySizeScalers.h" #include "GPUHostDataTypes.h" +#include "GPUSettings.h" +#include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/ZeroSuppression.h" #include "DataFormatsTPC/Digit.h" -#include "ChargePos.h" -#include "Array2D.h" +#include "CfChargePos.h" +#include "CfArray2D.h" using namespace o2::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 3e9ea2c6f608b..96efe08be6dc6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -46,7 +46,7 @@ namespace o2::gpu struct GPUTPCClusterMCInterimArray; struct TPCPadGainCalib; -struct ChargePos; +struct CfChargePos; class GPUTPCClusterFinder : public GPUProcessor { @@ -98,9 +98,9 @@ class GPUTPCClusterFinder : public GPUProcessor MinMaxCN* mMinMaxCN = nullptr; uint8_t* mPpadIsNoisy = nullptr; tpc::Digit* mPdigits = nullptr; // input digits, only set if ZS is skipped - ChargePos* mPpositions = nullptr; - ChargePos* mPpeakPositions = nullptr; - ChargePos* mPfilteredPeakPositions = nullptr; + CfChargePos* mPpositions = nullptr; + CfChargePos* mPpeakPositions = nullptr; + CfChargePos* mPfilteredPeakPositions = nullptr; uint8_t* mPisPeak = nullptr; uint32_t* mPclusterPosInRow = nullptr; // store the index where the corresponding cluster is stored in a bucket. // Required when MC are enabled to write the mc data to the correct position. diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index a9fbc1b5f40e0..da30375149b7c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -14,8 +14,10 @@ #include "GPUTPCClusterFinder.h" #include "GPUReconstruction.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "DataFormatsTPC/Digit.h" +#include "DataFormatsTPC/ClusterNative.h" +#include "GPUSettings.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -37,7 +39,7 @@ void GPUTPCClusterFinder::DumpDigits(std::ostream& out) void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view title) { out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - Array2D map(mPchargeMap); + CfArray2D map(mPchargeMap); out << std::hex; @@ -71,7 +73,7 @@ void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title) { out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - Array2D map(mPpeakMap); + CfArray2D map(mPpeakMap); out << std::hex; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx index fe3202fe7b439..092af2ea393c5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -15,6 +15,7 @@ #include "GPUReconstruction.h" #include "ML/3rdparty/GPUORTFloat16.h" #include "GPUTPCNNClusterizer.h" +#include "GPUSettings.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h index da490b0f94d58..022642f9f142e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -15,7 +15,7 @@ #ifndef O2_GPUTPCNNCLUSTERIZER_H #define O2_GPUTPCNNCLUSTERIZER_H -#include "ChargePos.h" +#include "CfChargePos.h" #include "GPUProcessor.h" namespace o2::OrtDataType diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 2cf9ab2037007..512bc1d3bb09b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -43,7 +43,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; o2::gpu::GPUTPCCFClusterizer::GPUSharedMemory smem_new; @@ -58,9 +58,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); - ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); + CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); @@ -75,7 +75,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); - ChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); + CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()); if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) { uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; - ChargePos tmp_pos = peak.delta(d); + CfChargePos tmp_pos = peak.delta(d); clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; } @@ -161,7 +161,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(chargeMap[peak].unpack()); int t = (rest_1 % (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputTime; - ChargePos tmp_pos(row + r, pad + p, time + t); + CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); } else if (dtype == 1) { @@ -227,8 +227,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); @@ -322,8 +322,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index 27cfba2487aed..dc7f537c6c1e8 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" #include "GPUTPCNNClusterizer.h" @@ -47,7 +47,7 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer); struct GPUSharedMemory { // Regular cluster finder - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_BUILD_N]; uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; }; diff --git a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx index 19ef7aa9ecd0d..e58edae208115 100644 --- a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx @@ -26,7 +26,7 @@ MCLabelAccumulator::MCLabelAccumulator(GPUTPCClusterFinder& clusterer) { } -void MCLabelAccumulator::collect(const ChargePos& pos, Charge q) +void MCLabelAccumulator::collect(const CfChargePos& pos, Charge q) { if (q == 0 || !engaged()) { return; diff --git a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h index 176fbea02befe..35c24bfeb5f18 100644 --- a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h @@ -16,7 +16,7 @@ #define O2_GPU_MC_LABEL_ACCUMULATOR_H #include "clusterFinderDefs.h" -#include "Array2D.h" +#include "CfArray2D.h" #include #include @@ -44,14 +44,14 @@ class MCLabelAccumulator public: MCLabelAccumulator(GPUTPCClusterFinder&); - void collect(const ChargePos&, tpccf::Charge); + void collect(const CfChargePos&, tpccf::Charge); bool engaged() const { return mLabels != nullptr && mOutput != nullptr; } void commit(tpccf::Row, uint32_t, uint32_t); private: - Array2D mIndexMap; + CfArray2D mIndexMap; const o2::dataformats::ConstMCLabelContainerView* mLabels = nullptr; GPUTPCClusterMCInterimArray* mOutput = nullptr; diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx index 3d6b45c372ea0..899149d320bda 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx @@ -17,6 +17,7 @@ #include "GPUTPCClusterData.h" #include "GPUReconstruction.h" #include "GPUO2DataTypes.h" +#include "GPUParam.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index c633f10adae38..2f754d2416bc1 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -22,6 +22,7 @@ #include "GPUTRDTrackerDebug.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" +#include "GPUConstantMem.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index c2b74489e6250..5b0960919da15 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" #include "GPUTPCDef.h" @@ -204,7 +205,7 @@ int32_t GPUDisplay::DrawGLScene() int32_t retVal = 0; if (mChain) { mIOPtrs = &mChain->mIOPtrs; - mCalib = &mChain->calib(); + mCalib = &mChain->GetCalib(); } if (!mIOPtrs) { mNCollissions = 0; diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index bb270cda23565..dbd90020698b2 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -15,12 +15,10 @@ #ifndef GPUDISPLAY_H #define GPUDISPLAY_H -#include "GPUSettings.h" #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" #include "GPUDisplayInterface.h" -#include "GPUChainTracking.h" #include "../utils/vecpod.h" #include "../utils/qsem.h" @@ -34,6 +32,7 @@ namespace o2::gpu class GPUTPCTracker; struct GPUParam; class GPUQA; +class GPUTRDGeometry; class GPUDisplay : public GPUDisplayInterface { @@ -77,7 +76,7 @@ class GPUDisplay : public GPUDisplayInterface int32_t& drawTextFontSize() { return mDrawTextFontSize; } private: - static constexpr int32_t NSECTORS = GPUChainTracking::NSECTORS; + static constexpr int32_t NSECTORS = GPUCA_NSECTORS; static constexpr float GL_SCALE_FACTOR = (1.f / 100.f); static constexpr const int32_t N_POINTS_TYPE = 15; @@ -157,16 +156,7 @@ class GPUDisplay : public GPUDisplayInterface void insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last); void insertVertexList(int32_t iSector, size_t first, size_t last); template - void SetInfo(Args... args) - { -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat-security" -#pragma GCC diagnostic ignored "-Wformat-truncation" - snprintf(mInfoText2, 1024, args...); -#pragma GCC diagnostic pop - GPUInfo("%s", mInfoText2); - mInfoText2Timer.ResetStart(); - } + void SetInfo(Args... args); void PrintGLHelpText(float colorValue); void calcXYZ(const float*); void mAnimationCloseAngle(float& newangle, float lastAngle); diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx index ded8803801fb7..98d2593c27950 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx @@ -34,6 +34,7 @@ #endif #include "GPUDisplay.h" +#include "GPULogging.h" #include using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx index 3ee3384c8e118..25ae5e1f8055f 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx @@ -27,6 +27,8 @@ #include "GPUDisplayBackendOpenGL.h" #include "shaders/GPUDisplayShaders.h" #include "GPUDisplay.h" +#include "GPULogging.h" +#include "GPUParam.h" #define OPENGL_EMULATE_MULTI_DRAW 0 diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx index 2324c194d04b9..93c19356ac062 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx @@ -19,6 +19,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE #include "GPUCommonDef.h" #include "GPUDisplayBackendVulkan.h" #include "GPUDisplay.h" +#include "GPULogging.h" +#include "GPUParam.h" #include diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx index 590d8648eb5bb..22970c3228815 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx @@ -30,6 +30,9 @@ #include "GPUDisplayFrontendWayland.h" #endif +#include "GPULogging.h" +#include + #ifdef GPUCA_BUILD_EVENT_DISPLAY_QT #include "GPUDisplayGUIWrapper.h" #else diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx index ad3b620ba8f55..d0aae2ffaad02 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx @@ -18,6 +18,7 @@ #include "GPUDisplayGUIWrapper.h" #include "GPUDisplay.h" #include "GPULogging.h" +#include "GPUParam.h" #include #include #include diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc b/GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc new file mode 100644 index 0000000000000..b6ac78b31f315 --- /dev/null +++ b/GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc @@ -0,0 +1,36 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDisplayInfo.inc +/// \author David Rohr + +#ifndef GPUDISPLAYINFO_INC_H +#define GPUDISPLAYINFO_INC_H + +#include "GPUDisplay.h" +#include "GPULogging.h" + +namespace o2::gpu +{ +template +void GPUDisplay::SetInfo(Args... args) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-security" +#pragma GCC diagnostic ignored "-Wformat-truncation" + snprintf(mInfoText2, 1024, args...); +#pragma GCC diagnostic pop + GPUInfo("%s", mInfoText2); + mInfoText2Timer.ResetStart(); +} +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index 32ff6c73e110c..54258857a244c 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx index 6c0595b073cd0..cc9ec2e766c4d 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx @@ -13,6 +13,8 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" +#include "GPUCommonMath.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx index b04c93ab8496e..ef94628baeb38 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx @@ -31,6 +31,7 @@ #include "backend/GPUDisplayBackendOpenGL.h" #include "shaders/GPUDisplayShaders.h" #include "GPUDisplay.h" +#include "GPULogging.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index ca9fd6be01703..9d188d03c7b69 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -13,6 +13,9 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "GPUChainTracking.h" +#include "GPULogging.h" +#include "GPUParam.h" #ifndef _WIN32 #include "bitmapfile.h" diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx index 644995929acb7..4dacaec2fbca5 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx @@ -14,6 +14,7 @@ #include #include "GPUDisplay.h" +#include "GPULogging.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx index ee50f32e3c1ac..d31ee206f35e2 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx @@ -16,6 +16,7 @@ #include "frontend/GPUDisplayFrontend.h" #include "GPUDisplayInterface.h" +#include #include #include diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx index dbeefc7bf9b07..07a05e585d422 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx @@ -17,6 +17,9 @@ #endif #include "GPUDisplay.h" +#include "GPULogging.h" +#include "GPUConstantMem.h" +#include "GPUChainTracking.h" using namespace o2::gpu; #ifndef GPUCA_NO_ROOT diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 24668c576d795..e1d63ea1a21e4 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -28,6 +28,7 @@ #include "GPUTPCMCInfo.h" #include "GPUParam.inc" #include "GPUCommonMath.h" +#include "GPUChainTracking.h" #include @@ -43,7 +44,7 @@ using namespace o2::gpu; #define GET_CID(sector, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[sector][i].id : (mIOPtrs->clustersNative->clusterOffset[sector][0] + i)) const GPUTRDGeometry* GPUDisplay::trdGeometry() { return (GPUTRDGeometry*)mCalib->trdGeometry; } -const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetTPCSectorTrackers()[iSector]; } +const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetProcessors()->tpcTrackers[iSector]; } inline void GPUDisplay::insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last) { diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index 0a780732273db..ab4c0abd7b60e 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -17,6 +17,7 @@ #endif #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" #include "GPUO2DataTypes.h" #include "GPUTPCClusterData.h" #include "GPUTPCConvertImpl.h" diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 7e3ddf868af2a..202ea47d1f3bf 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -128,7 +128,7 @@ o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUS o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf "uint32_t" offset int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage ChargePos* in ChargePos* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index ba7aeb3800a5e..6a2623fb6e09d 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -38,6 +38,7 @@ #include "GPUTPCDef.h" #include "GPUTPCTrackingData.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPUTPCTrack.h" #include "GPUTPCTracker.h" #include "GPUTPCGMMergedTrack.h" @@ -1702,7 +1703,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } - uint32_t nCl = clNative ? clNative->nClustersTotal : mTracking->GetTPCMerger().NMaxClusters(); + uint32_t nCl = clNative ? clNative->nClustersTotal : mTracking->GetProcessors()->tpcMerger.NMaxClusters(); mClusterCounts.nTotal += nCl; if (mQATasks & taskClusterCounts) { for (uint32_t i = 0; i < nCl; i++) { diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 2e1bc1c5c64b2..9c2220f9ef748 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -37,6 +37,7 @@ #include "GPUTPCGMPropagator.h" #include "GPUTPCGMMerger.h" #include "GPUChainTracking.h" +#include "GPUConstantMem.h" #include "../utils/qconfig.h" @@ -169,7 +170,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) GPUTPCGMPropagator prop; { prop.SetToyMCEventsFlag(kTRUE); - const GPUTPCGMMerger& merger = mRec->GetTPCMerger(); + const GPUTPCGMMerger& merger = mRec->GetProcessors()->tpcMerger; prop.SetPolynomialField(&merger.Param().polynomialField); } From 73a093527f4973ed92d43e7a8c893cbd8909af63 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 20 Apr 2025 20:57:39 +0200 Subject: [PATCH 0270/1764] GPU: Move kernel specializations to dedicated file --- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 8 +- ...GPUReconstructionCUDAKernelsSpecialize.inc | 138 ++++++++++++++++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 7 +- .../GPUReconstructionOCLKernelsSpecialize.inc | 20 +++ GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 124 +--------------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 15 -- 7 files changed, 164 insertions(+), 150 deletions(-) create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc create mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 758ab1b0e36c3..11a62bcec2318 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -22,16 +22,12 @@ using namespace o2::gpu; #include "GPUReconstructionIncludesDeviceAll.h" +#include "GPUReconstructionCUDAKernelsSpecialize.inc" + #if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUConstantMemBuffer; } #endif -template <> -inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) -{ - GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); -} - template inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc new file mode 100644 index 0000000000000..899c2e240cd94 --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -0,0 +1,138 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDAKernelsSpecialize.inc +/// \author David Rohr + +#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) + +namespace o2::gpu::internal +{ +namespace // anonymous +{ +struct MergeBorderTracks_compMax { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); + } +}; +struct MergeBorderTracks_compMin { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); + } +}; + +struct GPUTPCGMMergerSortTracks_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + if (a.CCE() != b.CCE()) { + return a.CCE() > b.CCE(); + } + if (a.Legs() != b.Legs()) { + return a.Legs() > b.Legs(); + } + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE + return a.NClusters() > b.NClusters(); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerSortTracksQPt_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerMergeLoopers_comp { + GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) + { + return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); + } +}; + +struct GPUTPCGMO2OutputSort_comp { + GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) + { + return (a.y > b.y); + } +}; + +} // anonymous namespace +} // namespace o2::gpu::internal + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +{ + if (cmpMax) { + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); + } else { + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin()); + } +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); +} +#endif // GPUCA_SPECIALIZE_THRUST_SORTS + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +{ + GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); +} diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index d7adb222d547b..d29a6afb60899 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDAKernelsSpecialize.inc GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 29b71017e9f73..72c68428149dd 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -15,12 +15,7 @@ #include "GPUReconstructionOCLIncludesHost.h" #include "GPUReconstructionKernelIncludes.h" -template <> -inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) -{ - cl_int4 val0 = {0, 0, 0, 0}; - GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); -} +#include "GPUReconstructionOCLKernelsSpecialize.inc" template inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc new file mode 100644 index 0000000000000..1b860e47a4243 --- /dev/null +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc @@ -0,0 +1,20 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionOCLKernelsSpecialize.inc +/// \author David Rohr + +template <> +inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +{ + cl_int4 val0 = {0, 0, 0, 0}; + GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); +} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 4f960a8e1ec76..b6241ad36b5de 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -736,46 +736,15 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea #endif } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3> -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct MergeBorderTracks_compMax { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); - } -}; -struct MergeBorderTracks_compMin { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); - } -}; -} // anonymous namespace -} // namespace o2::gpu::internal - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) -{ - if (cmpMax) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); - } else { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin()); - } -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3> - template <> GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0) { if (cmpMax) { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); } else { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); } } #endif @@ -1783,74 +1752,6 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr } } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct GPUTPCGMMergerSortTracks_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerSortTracksQPt_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - } -}; -} // anonymous namespace -} // namespace o2::gpu::internal - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); -} - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt - GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS @@ -2050,27 +1951,6 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, #endif } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct GPUTPCGMMergerMergeLoopers_comp { - GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) - { - return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); - } -}; -} // anonymous namespace -} // namespace o2::gpu::internal - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt - GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const MergeLooperParam* params = mLooperCandidates; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 6640b556c3011..ea219a02a1887 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -93,21 +93,6 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, #endif } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMO2Output::Thread -struct GPUTPCGMO2OutputSort_comp { - GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) - { - return (a.y > b.y); - } -}; - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread - template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { From 29e2526be7b62961793f538b1c00d7f7f1618ccb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Apr 2025 10:30:29 +0200 Subject: [PATCH 0271/1764] GPU: Add protections not to do invalid memory allocations while volatile memory is allocated --- GPU/GPUTracking/Base/GPUMemoryResource.h | 34 +++++------ GPU/GPUTracking/Base/GPUReconstruction.cxx | 56 ++++++++++++------- GPU/GPUTracking/Base/GPUReconstruction.h | 18 +++--- GPU/GPUTracking/Global/GPUChainITS.cxx | 4 +- .../Global/GPUChainTrackingCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 2 +- .../Global/GPUChainTrackingMerger.cxx | 2 +- 7 files changed, 69 insertions(+), 49 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 06e350db0bfc7..947bcac504733 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -56,24 +56,24 @@ class GPUMemoryResource public: enum MemoryType { - MEMORY_HOST = 1, - MEMORY_GPU = 2, - MEMORY_INPUT_FLAG = 4, - MEMORY_INPUT = 7, - MEMORY_OUTPUT_FLAG = 8, - MEMORY_OUTPUT = 11, - MEMORY_INOUT = 15, - MEMORY_SCRATCH = 16, - MEMORY_SCRATCH_HOST = 17, - MEMORY_EXTERNAL = 32, - MEMORY_PERMANENT = 64, - MEMORY_CUSTOM = 128, - MEMORY_CUSTOM_TRANSFER = 256, - MEMORY_STACK = 512 + MEMORY_HOST = 1, // Memory allocated on host (irrespective of other flags) + MEMORY_GPU = 2, // Memory allocated on GPU (irrespective of other flags) + MEMORY_INPUT_FLAG = 4, // Flag to signal this memory is copied to GPU with TransferMemoryResourcesToGPU, and alike + MEMORY_INPUT = 7, // Input data for GPU has the MEMORY_INPUT_FLAG flat and is allocated on host and GPU + MEMORY_OUTPUT_FLAG = 8, // Flag to signal this memory is copied to Host with TransferMemoryResourcesToHost, and alike + MEMORY_OUTPUT = 11, // Output data for GPU has the MEMORY_OUTPUT_FLAG flat and is allocated on host and GPU + MEMORY_INOUT = 15, // Combination if MEMORY_INPUT and MEMORY_OUTPUT + MEMORY_SCRATCH = 16, // Scratch memory, is allocated only on GPU by default if running on GPU, only on host otherwise, if MEMORY_HOST and MEMORY_GPU flags not set. + MEMORY_SCRATCH_HOST = 17, // Scratch memory only on host + MEMORY_EXTERNAL = 32, // Special flag to signal that memory on host shall not be allocated, but will be provided externally and manually + MEMORY_PERMANENT = 64, // Permanent memory, registered once with AllocateRegisteredPermanentMemory, not per time frame. Only for small sizes! + MEMORY_CUSTOM = 128, // Memory is not allocated automatically with AllocateRegisteredMemory(GPUProcessor), but must be allocated manually via AllocateRegisteredMemory(memoryId) + MEMORY_CUSTOM_TRANSFER = 256, // Memory is not transfered automatically with TransferMemoryResourcesTo, but must be transferred manually with TransferMemoryTo...(memoryId) + MEMORY_STACK = 512 // Use memory from non-persistent stack at the end of the global memory region. Not persistent for full TF. Use PushNonPersistentMemory and PopNonPersistentMemory to release memory from the stack }; - enum AllocationType { ALLOCATION_AUTO = 0, - ALLOCATION_INDIVIDUAL = 1, - ALLOCATION_GLOBAL = 2 }; + enum AllocationType { ALLOCATION_AUTO = 0, // --> GLOBAL if GPU is used, INDIVIDUAL otherwise + ALLOCATION_INDIVIDUAL = 1, // Individual memory allocations with malloc (host only) + ALLOCATION_GLOBAL = 2 }; // Allocate memory blocks from large preallocated memory range with internal allocator (host and GPU) GPUMemoryResource(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), MemoryType type, const char* name = "") : mProcessor(proc), mPtr(nullptr), mPtrDevice(nullptr), mSetPointers(setPtr), mName(name), mSize(0), mOverrideSize(0), mReuse(-1), mType(type) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index c79c743e96ce5..ab2210e5dd555 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -538,6 +538,10 @@ size_t GPUReconstruction::AllocateRegisteredPermanentMemory() if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating Permanent Memory"); } + if (mVolatileMemoryStart) { + GPUError("Must not allocate permanent memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } int32_t total = 0; for (uint32_t i = 0; i < mMemoryResources.size(); i++) { if ((mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT) && mMemoryResources[i].mPtr == nullptr) { @@ -669,6 +673,10 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUError("Device Processor not set (%s)", res->mName); throw std::bad_alloc(); } + if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && !(res->mType & GPUMemoryResource::MEMORY_STACK)) { + GPUError("Must not allocate non-stacked device memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } size_t size = AllocateRegisteredMemoryHelper(res, res->mPtrDevice, recPool->mDeviceMemoryPool, recPool->mDeviceMemoryBase, recPool->mDeviceMemorySize, &GPUMemoryResource::SetDevicePointers, recPool->mDeviceMemoryPoolEnd, " gpu"); if (!(res->mType & GPUMemoryResource::MEMORY_HOST) || (res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { @@ -702,7 +710,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro return res->mReuse >= 0 ? 0 : res->mSize; } -void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) +void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) { if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) { throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); @@ -711,6 +719,10 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); return GPUProcessor::alignPointer(mUnmanagedChunks.back().get()); } else { + if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { + GPUError("Must not allocate direct memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } void*& pool = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPool : mHostMemoryPool; void*& poolend = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd; char* retVal; @@ -745,7 +757,6 @@ void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size) if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; } - return retVal; } @@ -758,6 +769,30 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device) return GPUProcessor::alignPointer(mVolatileChunks.back().get()); } +void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile() +{ + mDeviceMemoryAsVolatile = true; + AllocateVolatileDeviceMemory(0); +} + +void GPUReconstruction::ReturnVolatileDeviceMemory() +{ + mDeviceMemoryAsVolatile = false; + if (mVolatileMemoryStart) { + mDeviceMemoryPool = mVolatileMemoryStart; + mVolatileMemoryStart = nullptr; + } + if (GetProcessingSettings().allocDebugLevel >= 2) { + std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; + } +} + +void GPUReconstruction::ReturnVolatileMemory() +{ + ReturnVolatileDeviceMemory(); + mVolatileChunks.clear(); +} + void GPUReconstruction::ResetRegisteredMemoryPointers(GPUProcessor* proc) { for (uint32_t i = 0; i < mMemoryResources.size(); i++) { @@ -814,23 +849,6 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) res->mPtrDevice = nullptr; } -void GPUReconstruction::ReturnVolatileDeviceMemory() -{ - if (mVolatileMemoryStart) { - mDeviceMemoryPool = mVolatileMemoryStart; - mVolatileMemoryStart = nullptr; - } - if (GetProcessingSettings().allocDebugLevel >= 2) { - std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; - } -} - -void GPUReconstruction::ReturnVolatileMemory() -{ - ReturnVolatileDeviceMemory(); - mVolatileChunks.clear(); -} - void GPUReconstruction::PushNonPersistentMemory(uint64_t tag) { mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), tag); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index b6256f7f8ad82..396a007761fb7 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -166,9 +166,10 @@ class GPUReconstruction size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr); void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction* rec, GPUOutputControl* control = nullptr); - void* AllocateUnmanagedMemory(size_t size, int32_t type); + void* AllocateDirectMemory(size_t size, int32_t type); void* AllocateVolatileDeviceMemory(size_t size); void* AllocateVolatileMemory(size_t size, bool device); + void MakeFutureDeviceMemoryAllocationsVolatile(); void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false); void FreeRegisteredMemory(int16_t res); void ClearAllocatedMemory(bool clearOutputs = true); @@ -326,14 +327,15 @@ class GPUReconstruction void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool size_t mHostMemorySize = 0; // Size of host memory buffer size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time - void* mDeviceMemoryBase = nullptr; // - void* mDeviceMemoryPermanent = nullptr; // - void* mDeviceMemoryPool = nullptr; // - void* mDeviceMemoryPoolEnd = nullptr; // - void* mDeviceMemoryPoolBlocked = nullptr; // - size_t mDeviceMemorySize = 0; // + void* mDeviceMemoryBase = nullptr; // Same for device ... + void* mDeviceMemoryPermanent = nullptr; // ... + void* mDeviceMemoryPool = nullptr; // ... + void* mDeviceMemoryPoolEnd = nullptr; // ... + void* mDeviceMemoryPoolBlocked = nullptr; // ... + size_t mDeviceMemorySize = 0; // ... + size_t mDeviceMemoryUsedMax = 0; // ... void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized - size_t mDeviceMemoryUsedMax = 0; // + bool mDeviceMemoryAsVolatile = false; // Make device memory allocations volatile std::unordered_set mRegisteredMemoryPtrs; // List of pointers registered for GPU diff --git a/GPU/GPUTracking/Global/GPUChainITS.cxx b/GPU/GPUTracking/Global/GPUChainITS.cxx index eeead79b1840b..5d36dc63ca85d 100644 --- a/GPU/GPUTracking/Global/GPUChainITS.cxx +++ b/GPU/GPUTracking/Global/GPUChainITS.cxx @@ -28,7 +28,7 @@ class GPUFrameworkExternalAllocator final : public o2::its::ExternalAllocator public: void* allocate(size_t size) override { - return mFWReco->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU); + return mFWReco->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); } void setReconstructionFramework(o2::gpu::GPUReconstruction* fwr) { mFWReco = fwr; } @@ -86,7 +86,7 @@ o2::its::TimeFrame* GPUChainITS::GetITSTimeframe() } #if !defined(GPUCA_STANDALONE) if (mITSTimeFrame->mIsGPU) { - auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU); }; + auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); }; mFrameworkAllocator.reset(new o2::its::GPUFrameworkExternalAllocator); mFrameworkAllocator->setReconstructionFramework(rec()); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index fc07a91004c5f..24c74a661f18e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -43,7 +43,7 @@ int32_t GPUChainTracking::RunTPCCompression() } if (gatherMode == 3) { - mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile + mRec->MakeFutureDeviceMemoryAllocationsVolatile(); } SetupGPUProcessor(&Compressor, true); new (Compressor.mMemory) GPUTPCCompression::memory; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 5d05cd6a97776..53bdfbadd4b25 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -34,7 +34,7 @@ static inline uint32_t RGB(uint8_t r, uint8_t g, uint8_t b) { return (uint32_t)r int32_t GPUChainTracking::PrepareProfile() { #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE - char* tmpMem = (char*)mRec->AllocateUnmanagedMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU); + char* tmpMem = (char*)mRec->AllocateDirectMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU); processorsShadow()->tpcTrackers[0].mStageAtSync = tmpMem; runKernel({{BlockCount(), ThreadCount(), -1}}, tmpMem, PROFILE_MAX_SIZE); #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 163f08634ef86..84835a1695071 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -297,7 +297,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) SynchronizeEventAndRelease(mEvents->single, doGPU); if (GetProcessingSettings().clearO2OutputFromGPU) { - mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile + mRec->MakeFutureDeviceMemoryAllocationsVolatile(); } AllocateRegisteredMemory(Merger.MemoryResOutputO2(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2)]); AllocateRegisteredMemory(Merger.MemoryResOutputO2Clus(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2ClusRefs)]); From 52c23287f6abc479a29f9aa2fc1acebf64e58f29 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 20 Apr 2025 23:01:33 +0200 Subject: [PATCH 0272/1764] GPU: Get rid of backendInternal additional wrapper --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 18 +----- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 5 +- .../GPUReconstructionProcessingKernels.inc | 9 ++- .../Base/cuda/GPUReconstructionCUDA.cu | 3 +- .../Base/cuda/GPUReconstructionCUDA.h | 7 +-- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 60 +++++++++---------- ...GPUReconstructionCUDAKernelsSpecialize.inc | 12 ++-- .../Base/opencl/GPUReconstructionOCL.cxx | 2 +- .../Base/opencl/GPUReconstructionOCL.h | 4 +- .../opencl/GPUReconstructionOCLKernels.cxx | 14 +---- .../GPUReconstructionOCLKernelsSpecialize.inc | 2 +- 11 files changed, 59 insertions(+), 77 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 2453ce4a2328f..5f80a56e9e64e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -54,7 +54,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU() } template -inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCPU::runKernelBackend(const krnlSetupTime& _xyz, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -88,7 +88,7 @@ inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& } template <> -inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCPU::runKernelBackend(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); if (nThreads > 1) { @@ -108,17 +108,6 @@ inline void GPUReconstructionCPU::runKernelBackendInternal(con } } -template -void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args) -{ -#pragma GCC diagnostic push -#if defined(__clang__) -#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below -#endif - std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); -#pragma GCC diagnostic push -} - template GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) { @@ -137,8 +126,7 @@ GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelPrope return ret; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - template void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args); \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ template GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index d0d8b05c4af0e..d93d1335d45c5 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -40,7 +40,7 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface template krnlProperties getKernelProperties(int gpu = -1); template - void runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } @@ -59,9 +59,6 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg) {} - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; } int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc index 49d02515372b8..b303cb7c8d39c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc @@ -21,6 +21,11 @@ namespace o2::gpu { +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below +#endif + template void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args) { @@ -28,7 +33,7 @@ void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ case x_num: { \ const auto& args2 = *(const krnlSetupArgs*)args; \ - ((T*)this)->template runKernelBackend(args2); \ + std::apply([this, &args2](auto&... vals) { ((T*)this)->template runKernelBackend(args2.s, vals...); }, args2.v); \ break; \ } #include "GPUReconstructionKernelList.h" @@ -36,6 +41,8 @@ void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const } // clang-format on } +#pragma GCC diagnostic push + } // namespace o2::gpu #endif // GPURECONSTRUCTIONPROCESSINGKERNELS_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 970b331ea99fb..c40c607396f3f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -24,7 +24,8 @@ #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" #include "GPUDefParametersLoad.inc" -#include "GPUReconstructionProcessingKernels.inc" +#include "GPUReconstructionKernelIncludes.h" +#include "GPUConstantMem.h" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index ed75100dfe351..36dcdffb1c6d6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -45,7 +45,9 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template - void runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); + template + void runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args); template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); @@ -53,9 +55,6 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac protected: GPUReconstructionCUDAInternals* mInternals; - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 11a62bcec2318..3267e1d5c67f6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -23,13 +23,15 @@ using namespace o2::gpu; #include "GPUReconstructionIncludesDeviceAll.h" #include "GPUReconstructionCUDAKernelsSpecialize.inc" +#include "GPUReconstructionProcessingKernels.inc" +template void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args); #if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUConstantMemBuffer; } #endif template -inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCUDA::runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args) { #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1 if (!GetProcessingSettings().rtc.enable) { @@ -52,18 +54,18 @@ inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& } template -void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args) +inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, const Args&... args) { - auto& x = args.s.x; - auto& z = args.s.z; + auto& x = _xyz.x; + auto& z = _xyz.z; if (z.evList) { for (int32_t k = 0; k < z.nEvents; k++) { GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); } } { - GPUDebugTiming timer(GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); - std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); + GPUDebugTiming timer(GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, _xyz, this); + runKernelBackendTimed(_xyz, args...); } GPUChkErr(cudaGetLastError()); if (z.ev) { @@ -74,31 +76,29 @@ void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& #undef GPUCA_KRNL_REG #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) -#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 // ---------- COMPILE_MODE = perkernel ---------- -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); -#else // ---------- COMPILE_MODE = onefile | rdc ---------- -#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 -#define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc -#endif - -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ - template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); - -#ifndef __HIPCC__ // CUDA version -#define GPUCA_KRNL_CALL(x_class, ...) \ - GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); -#else // HIP version -#undef GPUCA_KRNL_CUSTOM -#define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args) -#define GPUCA_KRNL_CALL(x_class, ...) \ - hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); -#endif // __HIPCC__ - +// clang-format off +#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE != 1 // ---------- COMPILE_MODE = perkernel ---------- + #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 + #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc + #endif + + #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) + + #ifndef __HIPCC__ // CUDA version + #define GPUCA_KRNL_CALL(x_class, ...) \ + GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); + #else // HIP version + #undef GPUCA_KRNL_CUSTOM + #define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args) + #define GPUCA_KRNL_CALL(x_class, ...) \ + hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); + #endif // __HIPCC__ + + #include "GPUReconstructionKernelList.h" + #undef GPUCA_KRNL #endif // ---------- COMPILE_MODE = onefile | rdc ---------- - -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL +// clang-format on #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 899c2e240cd94..8796f063abdc5 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -97,7 +97,7 @@ struct GPUTPCGMO2OutputSort_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { if (cmpMax) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); @@ -107,32 +107,32 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInter } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 28c809dd4a09a..949dd6195b262 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -13,8 +13,8 @@ /// \author David Rohr #include "GPUReconstructionOCLIncludesHost.h" -#include "GPUReconstructionProcessingKernels.inc" #include "GPUDefParametersLoad.inc" +#include "GPUConstantMem.h" #include diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 091bc0409630d..958d5186bf41a 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -34,7 +34,7 @@ class GPUReconstructionOCL : public GPUReconstructionProcessing::KernelInterface ~GPUReconstructionOCL() override; template - void runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); protected: int32_t InitDevice_Runtime() override; @@ -57,8 +57,6 @@ class GPUReconstructionOCL : public GPUReconstructionProcessing::KernelInterface template int32_t AddKernel(); - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); GPUReconstructionOCLInternals* mInternals; float mOclVersion; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 72c68428149dd..655df5404276b 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -16,9 +16,11 @@ #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionOCLKernelsSpecialize.inc" +#include "GPUReconstructionProcessingKernels.inc" +template void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args); template -inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionOCL::runKernelBackend(const krnlSetupTime& _xyz, const Args&... args) { cl_kernel k = getKernelObject(); auto& x = _xyz.x; @@ -48,12 +50,6 @@ inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& } } -template -void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args) -{ - std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); -} - template int32_t GPUReconstructionOCL::AddKernel() { @@ -86,7 +82,3 @@ int32_t GPUReconstructionOCL::AddKernels() #undef GPUCA_KRNL return 0; } - -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args); -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc index 1b860e47a4243..d5b0338aecbd9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc @@ -13,7 +13,7 @@ /// \author David Rohr template <> -inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionOCL::runKernelBackend(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); From 255597cf52515da16637a9a37bb3b70de1806d80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 20 Apr 2025 23:47:46 +0200 Subject: [PATCH 0273/1764] GPU: Fix onefile compile mode after all the refactoring --- .../Base/GPUReconstructionKernelMacros.h | 14 -------------- .../Base/cuda/GPUReconstructionCUDA.h | 2 ++ .../Base/cuda/GPUReconstructionCUDAKernels.cu | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 2b16dfb32fe14..a03d9de13ef8f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -62,20 +62,6 @@ } #endif -// GPU Host wrappers for kernel -#define GPUCA_KRNL_HOST(x_class, ...) \ - GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::backendInternal { \ - public: \ - template \ - static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ - { \ - auto& x = _xyz.x; \ - auto& y = _xyz.y; \ - GPUCA_KRNL_CALL(x_class, __VA_ARGS__) \ - } \ - }; - #endif // GPUCA_GPUCODE #define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 36dcdffb1c6d6..6c126d153d8ae 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -48,6 +48,8 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); template void runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args); + template + struct kernelBackendMacro; template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 3267e1d5c67f6..e6ed94bba2cec 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -35,7 +35,7 @@ inline void GPUReconstructionCUDA::runKernelBackendTimed(const krnlSetupTime& _x { #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1 if (!GetProcessingSettings().rtc.enable) { - backendInternal::runKernelBackendMacro(_xyz, this, args...); + kernelBackendMacro::run(_xyz, this, args...); } else #endif { @@ -82,9 +82,6 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif - #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) - #ifndef __HIPCC__ // CUDA version #define GPUCA_KRNL_CALL(x_class, ...) \ GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); @@ -95,6 +92,18 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); #endif // __HIPCC__ + #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + GPUCA_KRNLGPU(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ + template <> struct GPUReconstructionCUDA::kernelBackendMacro { \ + template \ + static inline void run(const GPUReconstructionProcessing::krnlSetupTime& _xyz, auto* me, const Args&... args) \ + { \ + auto& x = _xyz.x; \ + auto& y = _xyz.y; \ + GPUCA_KRNL_CALL(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ + } \ + }; + #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL #endif // ---------- COMPILE_MODE = onefile | rdc ---------- From 062fbe612c19a9bb23d77fff43c42d74d7e54b6e Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 22 Apr 2025 10:33:57 +0200 Subject: [PATCH 0274/1764] ITS: fix integer comparison warning in NoiseMap --- .../ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h b/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h index 49e6f531eeb76..25b7f451b6452 100644 --- a/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h +++ b/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h @@ -205,7 +205,7 @@ class NoiseMap NoiseMap merge(const NoiseMap* prev) { int incre = 0; - for (size_t i = 0; i < (int)mNoisyPixels.size(); ++i) { + for (size_t i = 0; i < mNoisyPixels.size(); ++i) { for (const auto& prev_np : prev->mNoisyPixels[i]) { // only enters this for loop if the "i" chip exists. if (mNoisyPixels[i].find(prev_np.first) == mNoisyPixels[i].end()) { mNoisyPixels[i][prev_np.first] = prev_np.second; From b05a704889e0d5004c8029f25161af4112d27a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?BiaoZhang=20=28=E5=BC=A0=E5=BD=AA=29?= <52267892+zhangbiao-phy@users.noreply.github.com> Date: Tue, 22 Apr 2025 11:36:55 +0200 Subject: [PATCH 0275/1764] Add PDG code and mass of Bc meson (#14175) * Add PDG code and mass of Bc * Update make_pdg_header.py with Bc meson * Update PhysicsConstants.h * Update PhysicsConstants.h --- Common/Constants/include/CommonConstants/PhysicsConstants.h | 2 ++ Common/Constants/include/CommonConstants/make_pdg_header.py | 1 + 2 files changed, 3 insertions(+) diff --git a/Common/Constants/include/CommonConstants/PhysicsConstants.h b/Common/Constants/include/CommonConstants/PhysicsConstants.h index c6fa3cddcdad5..f3b983b966faf 100644 --- a/Common/Constants/include/CommonConstants/PhysicsConstants.h +++ b/Common/Constants/include/CommonConstants/PhysicsConstants.h @@ -34,6 +34,7 @@ enum Pdg { kB0 = 511, kB0Bar = -511, kBPlus = 521, + kBCPlus = 541, kBS = 531, kBSBar = -531, kD0 = 421, @@ -84,6 +85,7 @@ enum Pdg { constexpr double MassB0 = 5.27966; constexpr double MassB0Bar = 5.27966; constexpr double MassBPlus = 5.27934; +constexpr double MassBCPlus = 6.27447; constexpr double MassBS = 5.36692; constexpr double MassBSBar = 5.36692; constexpr double MassD0 = 1.86484; diff --git a/Common/Constants/include/CommonConstants/make_pdg_header.py b/Common/Constants/include/CommonConstants/make_pdg_header.py index e4f92e6e8b62d..89e3f0e35070d 100755 --- a/Common/Constants/include/CommonConstants/make_pdg_header.py +++ b/Common/Constants/include/CommonConstants/make_pdg_header.py @@ -89,6 +89,7 @@ class Pdg(Enum): kB0 = 511 kB0Bar = -511 kBPlus = 521 + kBCPlus = 541 kBS = 531 kBSBar = -531 kD0 = 421 From 3412dff968bad3565c97df879a5974c6366610ca Mon Sep 17 00:00:00 2001 From: Mattia Faggin Date: Tue, 22 Apr 2025 11:37:16 +0200 Subject: [PATCH 0276/1764] Add SigmaC(2520). (#14187) * Add SigmaC(2520). * Remove comments * Update python script. --------- Co-authored-by: Mattia Faggin --- Common/Constants/include/CommonConstants/PhysicsConstants.h | 4 ++++ Common/Constants/include/CommonConstants/make_pdg_header.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/Common/Constants/include/CommonConstants/PhysicsConstants.h b/Common/Constants/include/CommonConstants/PhysicsConstants.h index f3b983b966faf..5f169f799eb5b 100644 --- a/Common/Constants/include/CommonConstants/PhysicsConstants.h +++ b/Common/Constants/include/CommonConstants/PhysicsConstants.h @@ -64,6 +64,8 @@ enum Pdg { kPhi = 333, kSigmaC0 = 4112, kSigmaCPlusPlus = 4222, + kSigmaCStar0 = 4114, + kSigmaCStarPlusPlus = 4224, kX3872 = 9920443, kXi0 = 3322, kXiB0 = 5232, @@ -115,6 +117,8 @@ constexpr double MassKPlusStar892 = 0.89167; constexpr double MassPhi = 1.019461; constexpr double MassSigmaC0 = 2.45375; constexpr double MassSigmaCPlusPlus = 2.45397; +constexpr double MassSigmaCStar0 = 2.51848; +constexpr double MassSigmaCStarPlusPlus = 2.51841; constexpr double MassX3872 = 3.87165; constexpr double MassXi0 = 1.31486; constexpr double MassXiB0 = 5.7919; diff --git a/Common/Constants/include/CommonConstants/make_pdg_header.py b/Common/Constants/include/CommonConstants/make_pdg_header.py index 89e3f0e35070d..4fdfd052ff613 100755 --- a/Common/Constants/include/CommonConstants/make_pdg_header.py +++ b/Common/Constants/include/CommonConstants/make_pdg_header.py @@ -119,6 +119,8 @@ class Pdg(Enum): kPhi = 333 kSigmaC0 = 4112 kSigmaCPlusPlus = 4222 + kSigmaCStar0 = 4114 + kSigmaCStarPlusPlus = 4224 kX3872 = 9920443 kXi0 = 3322 kXiB0 = 5232 From 37d0ba15113ee8666208a00b0e6ef7c070df8f3f Mon Sep 17 00:00:00 2001 From: Matthias Kleiner Date: Thu, 10 Apr 2025 08:06:34 +0200 Subject: [PATCH 0277/1764] TPC: change default setting for pad status map --- .../TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h index aff7c279cf5a8..667386e6481ca 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h @@ -506,7 +506,7 @@ DataProcessorSpec getTPCFactorizeIDCSpec(const int lane, const std::vector Date: Tue, 22 Apr 2025 09:28:39 +0200 Subject: [PATCH 0278/1764] GPU CMake: Set GPU architecture before checking the language, to suppress warnings about architecture detection failure --- dependencies/FindO2GPU.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index d50705d106bf3..0c5313c16af68 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -103,6 +103,9 @@ endif() # ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) + if(CUDA_COMPUTETARGET) + set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE) + endif() set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) include(CheckLanguage) @@ -227,6 +230,9 @@ endif() # ---------------------------------- HIP ---------------------------------- if(ENABLE_HIP) + if(HIP_AMDGPUTARGET) + set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "" FORCE) + endif() if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") endif() @@ -300,7 +306,7 @@ if(ENABLE_HIP) endif() string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics if(HIP_AMDGPUTARGET) - set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection + set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") endif() else() set(HIP_ENABLED OFF) From 33f93083e7524497014e31740f87a5a74c399ca1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 09:34:49 +0200 Subject: [PATCH 0279/1764] ONNXRuntime CMake: Use standard ONNXRuntime lowercase library syntax, remove obsolete wrapper, move detection from toplevel CMake to dependencies --- CMakeLists.txt | 2 -- Common/ML/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- GPU/GPUTracking/CMakeLists.txt | 2 +- dependencies/FindONNXRuntime.cmake | 23 ----------------------- dependencies/O2Dependencies.cmake | 3 +++ 7 files changed, 7 insertions(+), 29 deletions(-) delete mode 100644 dependencies/FindONNXRuntime.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d28f191021fdf..b71d05175e9e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,8 +41,6 @@ o2_build_sanity_checks() set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED TRUE) -include(dependencies/FindONNXRuntime.cmake) - include(O2CheckCXXFeatures) o2_check_cxx_features() diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index 2db91fc4f4320..a5b336bf7e820 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -12,7 +12,7 @@ o2_add_library(ML SOURCES src/OrtInterface.cxx TARGETVARNAME targetName - PRIVATE_LINK_LIBRARIES O2::Framework ONNXRuntime::ONNXRuntime) + PRIVATE_LINK_LIBRARIES O2::Framework onnxruntime::onnxruntime) # Pass ORT variables as a preprocessor definition target_compile_definitions(${targetName} PRIVATE diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index f595fb051db54..8dd430d00a5c0 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -114,7 +114,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA - PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime + PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index d29a6afb60899..315a6c2fa3080 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -162,7 +162,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP - PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime + PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 0cd302cc0be94..c0648b3274108 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -343,7 +343,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML - PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime + PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) diff --git a/dependencies/FindONNXRuntime.cmake b/dependencies/FindONNXRuntime.cmake deleted file mode 100644 index b783c2e1c7bf3..0000000000000 --- a/dependencies/FindONNXRuntime.cmake +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -find_package(ONNXRuntime::ONNXRuntime CONFIG) -if (ONNXRuntime::ONNXRuntime_FOUND) - set(onnxruntime_FOUND 1) - add_library(onnxruntime::onnxruntime ALIAS ONNXRuntime::ONNXRuntime) -endif() - -if (NOT ONNXRuntime::ONNXRuntime_FOUND) - find_package(onnxruntime CONFIG) - if (onnxruntime_FOUND) - add_library(ONNXRuntime::ONNXRuntime ALIAS onnxruntime::onnxruntime) - endif() -endif() diff --git a/dependencies/O2Dependencies.cmake b/dependencies/O2Dependencies.cmake index c5c22b3a79097..9020f99b4877b 100644 --- a/dependencies/O2Dependencies.cmake +++ b/dependencies/O2Dependencies.cmake @@ -69,6 +69,9 @@ if (NOT TARGET Gandiva::gandiva_shared) add_library(Gandiva::gandiva_shared ALIAS gandiva_shared) endif() +find_package(onnxruntime CONFIG) +set_package_properties(onnxruntime PROPERTIES TYPE REQUIRED) + find_package(Vc) set_package_properties(Vc PROPERTIES TYPE REQUIRED) From 121ec682d4868106df67cf923699e82523afd23c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 10:22:49 +0200 Subject: [PATCH 0280/1764] GPU ONNX: Fix compiler warning and simplify code --- .../Base/cuda/GPUReconstructionCUDA.cu | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index c40c607396f3f..d5b01bfa34833 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -621,21 +621,10 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) } } -#ifndef __HIPCC__ // CUDA -void GPUReconstructionCUDA::startGPUProfiling() -{ - GPUChkErr(cudaProfilerStart()); -} - -void GPUReconstructionCUDA::endGPUProfiling() -{ - GPUChkErr(cudaProfilerStop()); -} - void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) { -#ifdef ORT_CUDA_BUILD - cudaGetDevice(deviceId); + GPUChkErr(cudaGetDevice(deviceId)); +#if !defined(__HIPCC__) && defined(ORT_CUDA_BUILD) OrtCUDAProviderOptionsV2* cuda_options = nullptr; CreateCUDAProviderOptions(&cuda_options); @@ -650,22 +639,7 @@ void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_option // Finally, don't forget to release the provider options ReleaseCUDAProviderOptions(cuda_options); -#endif // ORT_CUDA_BUILD -} - -#else // HIP -void* GPUReconstructionHIP::getGPUPointer(void* ptr) -{ - void* retVal = nullptr; - GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); - return retVal; -} - -void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) -{ -#ifdef ORT_ROCM_BUILD - // Create ROCm provider options - cudaGetDevice(deviceId); +#elif defined(ORT_ROCM_BUILD) // const auto& api = Ort::GetApi(); // api.GetCurrentGpuDeviceId(deviceId); OrtROCMProviderOptions rocm_options; @@ -676,4 +650,25 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options session_options.AppendExecutionProvider_ROCM(rocm_options); #endif // ORT_ROCM_BUILD } + +#ifndef __HIPCC__ // CUDA + +void GPUReconstructionCUDA::startGPUProfiling() +{ + GPUChkErr(cudaProfilerStart()); +} + +void GPUReconstructionCUDA::endGPUProfiling() +{ + GPUChkErr(cudaProfilerStop()); +} + +#else // HIP +void* GPUReconstructionHIP::getGPUPointer(void* ptr) +{ + void* retVal = nullptr; + GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); + return retVal; +} + #endif // __HIPCC__ From 72eed35cf94511efea88384d35dba3b9ad9e9bae Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Tue, 22 Apr 2025 14:23:33 +0200 Subject: [PATCH 0281/1764] Bug fix for incorrect setting --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 1106f96ed1cb2..2d8c2184e3b02 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -272,7 +272,7 @@ AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regress AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).") AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)") // CCDB -AddOption(nnLoadFromCCDB, int, 1, "", 0, "If 1 networks are fetched from ccdb, else locally") +AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally") AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched") AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched") AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks") From 83c6a8a500f4f4a01e83475fbd608cbde674af76 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 11:06:27 +0200 Subject: [PATCH 0282/1764] ML: Fix compiler warnings --- Common/ML/include/ML/3rdparty/GPUORTFloat16.h | 10 +++++----- Common/ML/include/ML/OrtInterface.h | 1 + Common/ML/src/OrtInterface.cxx | 4 ++-- .../Global/GPUChainTrackingClusterizer.cxx | 4 ++-- .../TPCClusterFinder/GPUTPCNNClusterizerHost.cxx | 16 +++++++++------- .../GPUTPCNNClusterizerKernels.cxx | 6 +++--- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h index 9516ba5dad573..3bf2f465b2a35 100644 --- a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h +++ b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h @@ -535,9 +535,9 @@ GPUdi() uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept result = kPositiveQNaNBits; } else { auto get_msb_half = [](float fl) { - uint16_t result; + uint16_t res; #ifdef GPUCA_GPUCODE - o2::gpu::CAMath::memcpy(&result, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); + o2::gpu::CAMath::memcpy(&res, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); #else #ifdef __cpp_if_constexpr if constexpr (detail::endian::native == detail::endian::little) @@ -545,12 +545,12 @@ GPUdi() uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept if (detail::endian::native == detail::endian::little) #endif { - std::memcpy(&result, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); + std::memcpy(&res, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); } else { - std::memcpy(&result, &fl, sizeof(uint16_t)); + std::memcpy(&res, &fl, sizeof(uint16_t)); } #endif - return result; + return res; }; uint16_t upper_bits = get_msb_half(v); diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index e37b6a69b6036..791f6813c2d24 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -22,6 +22,7 @@ #include #include #include +#include // O2 includes #include "Framework/Logger.h" diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index 24a2fbffb252c..a8a20b11f9e64 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -19,6 +19,8 @@ // ONNX includes #include +#include + namespace o2 { @@ -139,7 +141,6 @@ void OrtModel::initSession() void OrtModel::memoryOnDevice(int32_t deviceIndex) { -#if (defined(ORT_ROCM_BUILD) || defined(ORT_MIGRAPHX_BUILD) || defined(ORT_CUDA_BUILD) || defined(ORT_TENSORRT_BUILD)) if (deviceIndex >= 0) { (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -161,7 +162,6 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt; } } -#endif } void OrtModel::resetSession() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 981d565852d28..37c12b2a3b3f4 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -980,12 +980,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } - float time_clusterizer = 0, time_fill = 0, time_networks = 0; + // float time_clusterizer = 0, time_fill = 0, time_networks = 0; for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) { uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode; size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); - auto start0 = std::chrono::high_resolution_clock::now(); + // auto start0 = std::chrono::high_resolution_clock::now(); runKernel({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data // auto stop0 = std::chrono::high_resolution_clock::now(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index db2f05711f537..31b71fd8f1ebe 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -29,7 +29,7 @@ using namespace o2::gpu; void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings) { std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath; - std::vector reg_model_paths; + std::vector reg_model_paths_local; std::vector evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':'); if (settings.nnLoadFromCCDB) { @@ -60,20 +60,20 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set model_class.initOptions(OrtOptions); modelsUsed[0] = true; - reg_model_paths = o2::utils::Str::tokenize(reg_model_path, ':'); + reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':'); if (!settings.nnClusterizerUseCfRegression) { - if (reg_model_paths.size() == 1) { - OrtOptions["model-path"] = reg_model_paths[0]; + if (reg_model_paths_local.size() == 1) { + OrtOptions["model-path"] = reg_model_paths_local[0]; OrtOptions["onnx-environment-name"] = "r1"; model_reg_1.initOptions(OrtOptions); modelsUsed[1] = true; } else { - OrtOptions["model-path"] = reg_model_paths[0]; + OrtOptions["model-path"] = reg_model_paths_local[0]; OrtOptions["onnx-environment-name"] = "r1"; model_reg_1.initOptions(OrtOptions); modelsUsed[1] = true; - OrtOptions["model-path"] = reg_model_paths[1]; + OrtOptions["model-path"] = reg_model_paths_local[1]; OrtOptions["onnx-environment-name"] = "r2"; model_reg_2.initOptions(OrtOptions); modelsUsed[2] = true; @@ -154,6 +154,7 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info MockedOrtAllocator::~MockedOrtAllocator() { // Ort::GetApi().ReleaseMemoryInfo(memory_info); + (void)0; // Suppress warning for empty destructor } void* MockedOrtAllocator::Alloc(size_t size) @@ -191,8 +192,9 @@ size_t MockedOrtAllocator::NumReserveAllocations() const void MockedOrtAllocator::LeakCheck() { - if (memory_inuse.load()) + if (memory_inuse.load()) { LOG(warning) << "memory leak!!!"; + } } void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 512bc1d3bb09b..413293502d3c6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -124,7 +124,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.row()), pad = static_cast(peak.pad()); - if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) { + if (clustererNN.nnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.nnClusterizerElementSize - 1)) { uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; @@ -141,7 +141,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(pad) / GPUTPCGeometry::NPads(row); } - } else if (transient_index < (clustererNN.nnClusterizerElementSize - 3)) { + } else if ((int32_t)transient_index < (clustererNN.nnClusterizerElementSize - 3)) { int time = static_cast(peak.time()); int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); @@ -197,7 +197,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.modelProbabilities_16[pIdx]); From 427e840295ead75ab7256b6c85fa1ed2cb4f0ec0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 11:06:40 +0200 Subject: [PATCH 0283/1764] GPU Standalone: support build with ONNXRuntime --- Common/ML/CMakeLists.txt | 2 +- Common/ML/include/ML/OrtInterface.h | 2 +- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 12 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 11 +- GPU/GPUTracking/CMakeLists.txt | 10 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 23 +- GPU/GPUTracking/Standalone/cmake/config.cmake | 1 + GPU/GPUTracking/Standalone/cmake/prepare.sh | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 - GPU/GPUTracking/kernels.cmake | 214 +++++++++--------- 10 files changed, 151 insertions(+), 128 deletions(-) diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index a5b336bf7e820..0ed52e1a23e20 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -12,7 +12,7 @@ o2_add_library(ML SOURCES src/OrtInterface.cxx TARGETVARNAME targetName - PRIVATE_LINK_LIBRARIES O2::Framework onnxruntime::onnxruntime) + PRIVATE_LINK_LIBRARIES O2::GPUCommon onnxruntime::onnxruntime) # Pass ORT variables as a preprocessor definition target_compile_definitions(${targetName} PRIVATE diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index 791f6813c2d24..ea70e28c0421c 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -25,7 +25,7 @@ #include // O2 includes -#include "Framework/Logger.h" +#include "GPUCommonLogger.h" namespace Ort { diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 8dd430d00a5c0..36162bcaa2f13 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -121,12 +121,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_SOURCE_DIR}/DataFormats/Reconstruction/src ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) - - target_compile_definitions(${targetName} PRIVATE - GPUCA_HAS_ONNX=1 - $<$:ORT_CUDA_BUILD> - $<$:ORT_TENSORRT_BUILD>) - install(FILES ${HDRS} DESTINATION include/GPU) endif() @@ -141,6 +135,12 @@ endif() target_compile_definitions(${targetName} PRIVATE $) +if (onnxruntime_FOUND) + target_compile_definitions(${targetName} PRIVATE + $<$:ORT_CUDA_BUILD> + $<$:ORT_TENSORRT_BUILD>) +endif() + # Setting target architecture and adding GPU libraries target_link_libraries(${targetName} PRIVATE cuda cudart nvrtc) set_target_cuda_arch(${targetName}) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 315a6c2fa3080..9398ffdd5b9f1 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -170,11 +170,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) - target_compile_definitions(${targetName} PRIVATE - GPUCA_HAS_ONNX=1 - $<$:ORT_ROCM_BUILD> - $<$:ORT_MIGRAPHX_BUILD>) - install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP @@ -195,6 +190,12 @@ endif() target_compile_definitions(${targetName} PRIVATE $) +if (onnxruntime_FOUND) + target_compile_definitions(${targetName} PRIVATE + $<$:ORT_ROCM_BUILD> + $<$:ORT_MIGRAPHX_BUILD>) +endif() + add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. target_compile_definitions(${MODULE}_CXX PRIVATE $) target_include_directories(${MODULE}_CXX PRIVATE $) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index c0648b3274108..4c1de17025627 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -200,7 +200,7 @@ set(SRCS_NO_CINT ${SRCS_NO_CINT} Refit/GPUTrackingRefitKernel.cxx Merger/GPUTPCGMO2Output.cxx) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") +if(onnxruntime_FOUND) list(APPEND SRCS_NO_CINT TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx TPCClusterFinder/GPUTPCNNClusterizer.cxx TPCClusterFinder/GPUTPCNNClusterizerHost.cxx) endif() @@ -343,7 +343,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML - PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) @@ -351,7 +350,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${targetName} PRIVATE $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) o2_target_root_dictionary(${MODULE} HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} @@ -421,6 +420,11 @@ target_link_libraries(${targetName} PRIVATE TBB::tbb) target_compile_options(${targetName} PRIVATE -Wno-instantiation-after-specialization) +if (onnxruntime_FOUND) + target_compile_definitions(${targetName} PRIVATE GPUCA_HAS_ONNX=1) + target_link_libraries(${targetName} PRIVATE onnxruntime::onnxruntime) +endif() + # Add CMake recipes for GPU Tracking librararies if(CUDA_ENABLED OR OPENCL_ENABLED OR HIP_ENABLED) if(CMAKE_SYSTEM_NAME MATCHES Darwin) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index fbc256d5d7f91..a17c58ad1ba03 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -121,11 +121,25 @@ else() endif() # Detect GPU Backends -find_package(O2GPU) +find_package(O2GPU REQUIRED) + +if(GPUCA_CONFIG_ONNX) + find_package(onnxruntime REQUIRED) + if(CUDA_ENABLED AND NOT DEFINED ORT_CUDA_BUILD) + set(ORT_CUDA_BUILD ON) + elseif(HIP_ENABLED AND NOT DEFINED ORT_ROCM_BUILD) + set(ORT_ROCM_BUILD ON) + endif() +else() + set(onnxruntime_FOUND OFF) +endif() # Create main targets add_subdirectory(../../ GPU) -add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx +add_library(standalone_support SHARED + ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/Common/ML/src/OrtInterface.cxx + ${O2_DIR}/Common/Utils/src/StringUtils.cxx ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx @@ -150,6 +164,7 @@ target_include_directories(standalone_support PUBLIC ${O2_DIR}/Common/Constants/include ${O2_DIR}/Common/MathUtils/include ${O2_DIR}/Common/Utils/include + ${O2_DIR}/Common/ML/include ${O2_DIR}/DataFormats/common/include ${O2_DIR}/DataFormats/Detectors/Common/include ${O2_DIR}/DataFormats/Detectors/ITSMFT/common/include @@ -210,6 +225,10 @@ if(GPUCA_CONFIG_ROOT) ROOT::Tree) endif() +if(GPUCA_CONFIG_ONNX) + target_link_libraries(standalone_support PRIVATE onnxruntime::onnxruntime) +endif() + if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY) diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index af7c96bb96fbb..1de0cfa27d7ee 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -18,6 +18,7 @@ set(ENABLE_OPENCL AUTO) set(GPUCA_CONFIG_VC 1) set(GPUCA_CONFIG_FMT 1) set(GPUCA_CONFIG_ROOT 1) +set(GPUCA_CONFIG_ONNX 0) set(GPUCA_BUILD_EVENT_DISPLAY 1) set(GPUCA_BUILD_EVENT_DISPLAY_FREETYPE 1) set(GPUCA_BUILD_EVENT_DISPLAY_VULKAN 1) diff --git a/GPU/GPUTracking/Standalone/cmake/prepare.sh b/GPU/GPUTracking/Standalone/cmake/prepare.sh index 17474b5fc6956..121245e23dc65 100755 --- a/GPU/GPUTracking/Standalone/cmake/prepare.sh +++ b/GPU/GPUTracking/Standalone/cmake/prepare.sh @@ -11,6 +11,6 @@ else fi eval "`alienv shell-helper`" # alienv load O2/latest -for i in Vc boost fmt CMake ms_gsl Clang ninja TBB ROOT; do +for i in Vc boost fmt CMake ms_gsl Clang ninja TBB ROOT ONNXRuntime; do source sw/$ALIARCH/$i/latest/etc/profile.d/init.sh done diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 35f2915d9486a..e36cb4e2f3149 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -174,7 +174,6 @@ function(o2_gpu_kernel_add_parameter) list(LENGTH ARGV n) math(EXPR n "${n} - 1") foreach(i RANGE 0 ${n}) - message(STATUS "Adding ${ARGV${i}}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_PARAMS "${ARGV${i}}") endforeach() endfunction() @@ -182,7 +181,6 @@ function(o2_gpu_kernel_add_string_parameter) list(LENGTH ARGV n) math(EXPR n "${n} - 1") foreach(i RANGE 0 ${n}) - message(STATUS "Adding ${ARGV${i}}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_STRING_PARAMS "${ARGV${i}}") endforeach() endfunction() diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 202ea47d1f3bf..937a92fef33df 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -24,117 +24,117 @@ o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizati o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") -o2_gpu_kernel_file_list(TPCNNCLUSTERFINDER ERRORS ClusterAccumulator.cxx GPUTPCNNClusterizerKernels.cxx) -endif() o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) +if(onnxruntime_FOUND) +o2_gpu_kernel_file_list(TPCNNCLUSTERFINDER ERRORS ClusterAccumulator.cxx GPUTPCNNClusterizerKernels.cxx) +endif() -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletConstructor" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) -o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) -o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER") -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER") -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) -o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode) -o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t id) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t useOrigTrackParam int8_t mergeAll) -o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t output) -o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t border0 int32_t border1 int8_t useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) -o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER") -o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") -o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) -o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) -o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB) -o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB int32_t trackStart int32_t trackEnd) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB int32_t sectorStart int32_t nSectors) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) -o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr uint64_t size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr uint64_t size) +o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) +o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) +o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map uint32_t* output) +o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode) +o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t id) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t useOrigTrackParam int8_t mergeAll) +o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t output) +o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t border0 int32_t border1 int8_t useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) +o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") +o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) +o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) +o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB int32_t trackStart int32_t trackEnd) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB int32_t sectorStart int32_t nSectors) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) +o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf uint32_t offset int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) +o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) +if(onnxruntime_FOUND) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) endif() -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf "uint32_t" offset int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) -o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP NEIGHBOURS_FINDER_UNROLL_GLOBAL From e9b2d160946a3e929f6309c4af97171dd9cf0617 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 18:34:36 +0200 Subject: [PATCH 0284/1764] CUDA ORT: Must use api struct to call functions --- .../Base/cuda/GPUReconstructionCUDA.cu | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d5b01bfa34833..d4f9faaf203c9 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -621,24 +621,34 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) } } +#define ORTCHK(command) \ + { \ + OrtStatus* status = command; \ + if (status != nullptr) { \ + const char* msg = api->GetErrorMessage(status); \ + GPUFatal("ONNXRuntime Error: %s", msg); \ + } \ + } + void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) { GPUChkErr(cudaGetDevice(deviceId)); #if !defined(__HIPCC__) && defined(ORT_CUDA_BUILD) + const OrtApi* api = OrtGetApiBase()->GetApi(ORT_API_VERSION); OrtCUDAProviderOptionsV2* cuda_options = nullptr; - CreateCUDAProviderOptions(&cuda_options); + ORTCHK(api->CreateCUDAProviderOptions(&cuda_options)); // std::vector keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"}; // std::vector values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"}; // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); // this implicitly sets "has_user_compute_stream" - cuda_options.has_user_compute_stream = 1; - UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream]); + cuda_options->has_user_compute_stream = 1; + ORTCHK(api->UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream])); session_options.AppendExecutionProvider_CUDA_V2(cuda_options); // Finally, don't forget to release the provider options - ReleaseCUDAProviderOptions(cuda_options); + api->ReleaseCUDAProviderOptions(cuda_options); #elif defined(ORT_ROCM_BUILD) // const auto& api = Ort::GetApi(); // api.GetCurrentGpuDeviceId(deviceId); From 17132044b680bcbfe5d979fcc0f107b3d5ccdc2e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 22:35:20 +0200 Subject: [PATCH 0285/1764] GPU CUDA ORT: Fix usage of OrtCUDAProviderOptionsV2 --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 3 +-- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 36162bcaa2f13..dbdf6b606df18 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -114,7 +114,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA - PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -139,6 +138,7 @@ if (onnxruntime_FOUND) target_compile_definitions(${targetName} PRIVATE $<$:ORT_CUDA_BUILD> $<$:ORT_TENSORRT_BUILD>) + target_link_libraries(${targetName} PRIVATE onnxruntime::onnxruntime) endif() # Setting target architecture and adding GPU libraries diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d4f9faaf203c9..c8e5420a8bcf3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -643,9 +643,8 @@ void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_option // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); // this implicitly sets "has_user_compute_stream" - cuda_options->has_user_compute_stream = 1; ORTCHK(api->UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream])); - session_options.AppendExecutionProvider_CUDA_V2(cuda_options); + ORTCHK(api->SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options)); // Finally, don't forget to release the provider options api->ReleaseCUDAProviderOptions(cuda_options); diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 9398ffdd5b9f1..4689fee02d31e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -162,7 +162,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP - PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -194,6 +193,7 @@ if (onnxruntime_FOUND) target_compile_definitions(${targetName} PRIVATE $<$:ORT_ROCM_BUILD> $<$:ORT_MIGRAPHX_BUILD>) + target_link_libraries(${targetName} PRIVATE onnxruntime::onnxruntime) endif() add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. From 6a0656424b366aa04fd338ac3f27f26b40df948d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Apr 2025 21:23:11 +0200 Subject: [PATCH 0286/1764] GPU: Use aligned new/delete for some host allocations --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 27 ++++++++++++++-------- GPU/GPUTracking/Base/GPUReconstruction.h | 10 +++++--- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index ab2210e5dd555..7a8d73e689b84 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -716,8 +716,13 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); } if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); - return GPUProcessor::alignPointer(mUnmanagedChunks.back().get()); + char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size]; + if ((type & GPUMemoryResource::MEMORY_STACK)) { + mNonPersistentIndividualDirectAllocations.emplace_back(retVal, alignedDeleter()); + } else { + mDirectMemoryChunks.emplace_back(retVal, alignedDeleter()); + } + return retVal; } else { if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { GPUError("Must not allocate direct memory while volatile chunks are allocated"); @@ -765,8 +770,9 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device) if (device) { return AllocateVolatileDeviceMemory(size); } - mVolatileChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); - return GPUProcessor::alignPointer(mVolatileChunks.back().get()); + char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size]; + mVolatileChunks.emplace_back(retVal, alignedDeleter()); + return retVal; } void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile() @@ -851,7 +857,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) void GPUReconstruction::PushNonPersistentMemory(uint64_t tag) { - mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), tag); + mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), mNonPersistentIndividualDirectAllocations.size(), tag); } void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) @@ -862,11 +868,11 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) if (mNonPersistentMemoryStack.size() == 0) { GPUFatal("Trying to pop memory state from empty stack"); } - if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) { - GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); + if (tag != 0 && std::get<4>(mNonPersistentMemoryStack.back()) != tag) { + GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str()); } if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) { - printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); + printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); PrintMemoryOverview(); printf("%76s", ""); PrintMemoryMax(); @@ -882,6 +888,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) res->mPtrDevice = nullptr; } mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back())); + mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back())); mNonPersistentMemoryStack.pop_back(); } @@ -917,9 +924,11 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) FreeRegisteredMemory(i); } } - mUnmanagedChunks.clear(); mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); + mDirectMemoryChunks.clear(); + mNonPersistentIndividualDirectAllocations.clear(); + mVolatileChunks.clear(); mVolatileMemoryStart = nullptr; if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 396a007761fb7..f5b39cb370b9e 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -69,8 +69,6 @@ class GPUReconstruction class LibraryLoader; // These must be the first members to ensure correct destructor order! std::shared_ptr mMyLib = nullptr; std::vector mMemoryResources; - std::vector> mUnmanagedChunks; - std::vector> mVolatileChunks; std::vector> mChains; public: @@ -373,9 +371,15 @@ class GPUReconstruction GPUProcessor* proc = nullptr; std::vector res; }; + struct alignedDeleter { + void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; + }; std::unordered_map mMemoryReuse1to1; - std::vector> mNonPersistentMemoryStack; + std::vector> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag std::vector mNonPersistentIndividualAllocations; + std::vector> mNonPersistentIndividualDirectAllocations; + std::vector> mDirectMemoryChunks; + std::vector> mVolatileChunks; std::unique_ptr mPipelineContext; From fef77bcb601992f654c44035c593b1c60c41bbfa Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Apr 2025 21:40:18 +0200 Subject: [PATCH 0287/1764] GPU: Direct memory allocation supports stacked memory --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 45 +++++++++++++--------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 7a8d73e689b84..ad2ee2e840d00 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -712,9 +712,6 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) { - if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) { - throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); - } if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size]; if ((type & GPUMemoryResource::MEMORY_STACK)) { @@ -723,25 +720,35 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) mDirectMemoryChunks.emplace_back(retVal, alignedDeleter()); } return retVal; + } + + if ((type & ~(GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK)) || ((type & GPUMemoryResource::MEMORY_HOST) && (type & GPUMemoryResource::MEMORY_GPU))) { + throw std::runtime_error("Requested invalid memory typo for direct allocation"); + } + if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { + GPUError("Must not allocate direct memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } + + void*& pool = (type & GPUMemoryResource::MEMORY_GPU) ? mDeviceMemoryPool : mHostMemoryPool; + void*& poolend = (type & GPUMemoryResource::MEMORY_GPU) ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd; + char* retVal; + if ((type & GPUMemoryResource::MEMORY_STACK)) { + poolend = (char*)poolend - size; + poolend = (char*)poolend - GPUProcessor::getAlignmentMod(poolend); + retVal = (char*)poolend; } else { - if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { - GPUError("Must not allocate direct memory while volatile chunks are allocated"); - throw std::bad_alloc(); - } - void*& pool = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPool : mHostMemoryPool; - void*& poolend = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd; - char* retVal; GPUProcessor::computePointerWithAlignment(pool, retVal, size); - if (pool > poolend) { - GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend)); - throw std::bad_alloc(); - } - UpdateMaxMemoryUsed(); - if (GetProcessingSettings().allocDebugLevel >= 2) { - std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n"; - } - return retVal; } + if (pool > poolend) { + GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend)); + throw std::bad_alloc(); + } + UpdateMaxMemoryUsed(); + if (GetProcessingSettings().allocDebugLevel >= 2) { + std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n"; + } + return retVal; } void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size) From 1f6767ce039ea2ec6cc72da136368f1dad9677e3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 18:07:01 +0200 Subject: [PATCH 0288/1764] GPU CMake: Another attempt to silence the CMake warnings about architectures --- GPU/GPUTracking/Standalone/CMakeLists.txt | 6 +++--- dependencies/FindO2GPU.cmake | 17 +++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index a17c58ad1ba03..0859223187f00 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -125,10 +125,10 @@ find_package(O2GPU REQUIRED) if(GPUCA_CONFIG_ONNX) find_package(onnxruntime REQUIRED) - if(CUDA_ENABLED AND NOT DEFINED ORT_CUDA_BUILD) - set(ORT_CUDA_BUILD ON) - elseif(HIP_ENABLED AND NOT DEFINED ORT_ROCM_BUILD) + if(HIP_ENABLED AND NOT DEFINED ORT_ROCM_BUILD) set(ORT_ROCM_BUILD ON) + elseif(CUDA_ENABLED AND NOT DEFINED ORT_CUDA_BUILD) + set(ORT_CUDA_BUILD ON) endif() else() set(onnxruntime_FOUND OFF) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 0c5313c16af68..33925e8cf1341 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -104,7 +104,9 @@ endif() # ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) if(CUDA_COMPUTETARGET) - set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE) + set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET}) + else() + set(CMAKE_CUDA_ARCHITECTURES 61-virtual) endif() set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) @@ -121,11 +123,6 @@ if(ENABLE_CUDA) message(STATUS "Using as CUDA GCC version: ${GPUCA_CUDA_GCCBIN}") set(CMAKE_CUDA_HOST_COMPILER "${GPUCA_CUDA_GCCBIN}") endif() - if(CUDA_COMPUTETARGET) - set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE) - else() - set(CMAKE_CUDA_ARCHITECTURES 61-virtual CACHE STRING "" FORCE) - endif() enable_language(CUDA) get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) if (ENABLE_CUDA STREQUAL "AUTO") @@ -231,7 +228,8 @@ endif() # ---------------------------------- HIP ---------------------------------- if(ENABLE_HIP) if(HIP_AMDGPUTARGET) - set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "" FORCE) + set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") + set(AMDGPU_TARGETS "${HIP_AMDGPUTARGET}") endif() if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") @@ -239,11 +237,6 @@ if(ENABLE_HIP) if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" OR CMAKE_PREFIX_PATH MATCHES "rocm") set(CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_HIP_STANDARD_REQUIRED TRUE) - if(HIP_AMDGPUTARGET) - set(AMDGPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) - set(GPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) - set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) - endif() set(TMP_ROCM_DIR_LIST "${CMAKE_PREFIX_PATH}:$ENV{CMAKE_PREFIX_PATH}") string(REPLACE ":" ";" TMP_ROCM_DIR_LIST "${TMP_ROCM_DIR_LIST}") list(FILTER TMP_ROCM_DIR_LIST INCLUDE REGEX rocm) From cde32e7eee5ac9ff2668621115d0f839b95fb11c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 17 Apr 2025 20:22:59 +0200 Subject: [PATCH 0289/1764] Common: ConfKey align prov + print hash Signed-off-by: Felix Schlepper --- .../include/CommonUtils/ConfigurableParam.h | 2 +- .../CommonUtils/ConfigurableParamHelper.h | 14 +++--- Common/Utils/src/ConfigurableParamHelper.cxx | 49 ++++++++++++++----- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/Common/Utils/include/CommonUtils/ConfigurableParam.h b/Common/Utils/include/CommonUtils/ConfigurableParam.h index f44d9efcaea76..39b24bbbbd57c 100644 --- a/Common/Utils/include/CommonUtils/ConfigurableParam.h +++ b/Common/Utils/include/CommonUtils/ConfigurableParam.h @@ -162,7 +162,7 @@ class ConfigurableParam virtual std::string getName() const = 0; // print the current keys and values to screen (optionally with provenance information) - virtual void printKeyValues(bool showprov = true, bool useLogger = false) const = 0; + virtual void printKeyValues(bool showprov = true, bool useLogger = false, bool withPadding = false, bool showHash = false) const = 0; // get a single size_t hash_value of this parameter (can be used as a checksum to see // if object changed or different) diff --git a/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h b/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h index 7d9cb78bb9968..6e69fae03e6c3 100644 --- a/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h +++ b/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h @@ -34,7 +34,7 @@ struct ParamDataMember { std::string value; std::string provenance; - std::string toString(std::string const& prefix, bool showProv) const; + std::string toString(std::string const& prefix, bool showProv, size_t padding = 0) const; }; // ---------------------------------------------------------------- @@ -58,8 +58,8 @@ class _ParamHelper static void syncCCDBandRegistry(std::string const& mainkey, TClass* cl, void* to, void* from, std::map* provmap, size_t offset); - static void outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger); - static void printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger); + static void outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding = false, bool showHash = false); + static void printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding, bool showHash); static size_t getHashImpl(std::string const& mainkey, std::vector const* members); @@ -100,13 +100,13 @@ class ConfigurableParamHelper : virtual public ConfigurableParam // ---------------------------------------------------------------- // one of the key methods, using introspection to print itself - void printKeyValues(bool showProv = true, bool useLogger = false) const final + void printKeyValues(bool showProv = true, bool useLogger = false, bool withPadding = true, bool showHash = true) const final { if (!isInitialized()) { initialize(); } auto members = getDataMembers(); - _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger); + _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger, withPadding, showHash); } // @@ -237,13 +237,13 @@ class ConfigurableParamPromoter : public Base, virtual public ConfigurableParam // ---------------------------------------------------------------- // one of the key methods, using introspection to print itself - void printKeyValues(bool showProv = true, bool useLogger = false) const final + void printKeyValues(bool showProv = true, bool useLogger = false, bool withPadding = true, bool showHash = true) const final { if (!isInitialized()) { initialize(); } auto members = getDataMembers(); - _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger); + _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger, withPadding, showHash); } // diff --git a/Common/Utils/src/ConfigurableParamHelper.cxx b/Common/Utils/src/ConfigurableParamHelper.cxx index f217d402bcb45..161735b3a5ce4 100644 --- a/Common/Utils/src/ConfigurableParamHelper.cxx +++ b/Common/Utils/src/ConfigurableParamHelper.cxx @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef NDEBUG #undef NDEBUG #endif @@ -35,16 +36,25 @@ using namespace o2::conf; // ---------------------------------------------------------------------- -std::string ParamDataMember::toString(std::string const& prefix, bool showProv) const +std::string ParamDataMember::toString(std::string const& prefix, bool showProv, size_t padding) const { - std::string nil = ""; - + const std::string label = prefix + "." + name + " : " + value; std::ostringstream out; - out << prefix << "." << name << " : " << value; + out << label; if (showProv) { - std::string prov = (provenance.compare("") == 0 ? nil : provenance); - out << "\t\t[ " + prov + " ]"; + std::string prov = (provenance.compare("") == 0 ? "" : provenance); + if (padding) { + size_t len = label.size() - prefix.size() - 5; // 4 four the extra chars + 1 for the maxpad + if (len < padding) { + out << std::string(padding - len, ' '); + } else { + out << ' '; + } + out << "[ " + prov + " ]"; + } else { + out << "\t\t[ " + prov + " ]"; + } } return out.str(); } @@ -308,23 +318,40 @@ void _ParamHelper::fillKeyValuesImpl(std::string const& mainkey, TClass* cl, voi // ---------------------------------------------------------------------- -void _ParamHelper::printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger) +void _ParamHelper::printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding, bool showHash) { - _ParamHelper::outputMembersImpl(std::cout, mainkey, members, showProv, useLogger); + _ParamHelper::outputMembersImpl(std::cout, mainkey, members, showProv, useLogger, withPadding, showHash); } -void _ParamHelper::outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger) +void _ParamHelper::outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding, bool showHash) { if (members == nullptr) { return; } + size_t maxpad{0}; + if (withPadding) { + for (auto& member : *members) { + maxpad = std::max(maxpad, member.name.size() + member.value.size()); + } + } + + if (showHash) { + std::string shash = std::format("{:07x}", getHashImpl(mainkey, members)); + shash = shash.substr(0, 7); + if (useLogger) { + LOG(info) << mainkey << " [Hash#" << shash << "]"; + } else { + out << mainkey << " [Hash#" << shash << "]\n"; + } + } + for (auto& member : *members) { if (useLogger) { - LOG(info) << member.toString(mainkey, showProv); + LOG(info) << member.toString(mainkey, showProv, maxpad); } else { - out << member.toString(mainkey, showProv) << "\n"; + out << member.toString(mainkey, showProv, maxpad) << "\n"; } } } From aae7f0a9d4201dd1efff98d47d3ea51ebd7e9c19 Mon Sep 17 00:00:00 2001 From: Francesco Mazzaschi <43742195+fmazzasc@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:19:36 +0200 Subject: [PATCH 0290/1764] [StrangenessTracker] Fix chi2 calculation and attachment structure (#14186) * [StrangenessTracker] Fix chi2 calculation and attachment structure * Please consider the following formatting changes --------- Co-authored-by: Francesco Mazzaschi Co-authored-by: ALICE Action Bot --- .../StrangenessTracking/StrangenessTracker.h | 7 ++----- .../StrangenessTracking/src/StrangenessTracker.cxx | 14 ++++++-------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h index 11feac64c59ae..b30be4dd081e3 100644 --- a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h +++ b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h @@ -56,7 +56,7 @@ enum DauType : int { struct ClusAttachments { - std::array arr; + std::array arr; }; class StrangenessTracker @@ -357,10 +357,7 @@ class StrangenessTracker std::vector mFitter4Body; // optional DCA Fitter for final 4 Body refit (per thread) o2::base::PropagatorImpl::MatCorrType mCorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrNONE; // use mat correction - - std::vector> mDaughterTracks; // vector of daughter tracks (per thread) - ClusAttachments mStructClus; // # of attached tracks, 1 for mother, 2 for daughter - + std::vector> mDaughterTracks; // vector of daughter tracks (per thread) ClassDefNV(StrangenessTracker, 1); }; diff --git a/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx b/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx index c8a62fa76e3ed..acbaa9d6a08fd 100644 --- a/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx +++ b/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx @@ -245,7 +245,7 @@ void StrangenessTracker::processCascade(int iCasc, const Cascade& casc, const Ca strangeTrack.mDecayRef = iCasc; strangeTrack.mITSRef = mSortedITSindexes[iTrack]; mStrangeTrackVec[iThread].push_back(strangeTrack); - mClusAttachments[iThread].push_back(mStructClus); + mClusAttachments[iThread].push_back(structClus); if (mMCTruthON) { auto lab = getStrangeTrackLabel(itsTrack, strangeTrack, structClus); mStrangeTrackLabels[iThread].push_back(lab); @@ -350,7 +350,7 @@ bool StrangenessTracker::matchDecayToITStrack(float decayR, StrangeTrack& strang auto nMinClusMother = trackClusters.size() < 4 ? 2 : mStrParams->mMinMotherClus; std::vector motherClusters; - std::array nAttachments; + std::array nAttachments; nAttachments.fill(-1); // fill arr with -1 int nUpdates = 0; @@ -412,13 +412,13 @@ bool StrangenessTracker::matchDecayToITStrack(float decayR, StrangeTrack& strang std::reverse(motherClusters.begin(), motherClusters.end()); - mGlobalChi2 = -1; + mGlobalChi2 = 0; for (auto& clus : motherClusters) { if (!updateTrack(clus, motherTrackClone)) { break; } } - strangeTrack.mMatchChi2 = mGlobalChi2; + strangeTrack.mMatchChi2 = mGlobalChi2 / motherClusters.size(); LOG(debug) << "Inward-outward refit finished, starting final topology refit"; // final Topology refit @@ -481,7 +481,6 @@ bool StrangenessTracker::matchDecayToITStrack(float decayR, StrangeTrack& strang strangeTrack.mTopoChi2 = mFitter3Body[iThread].getChi2AtPCACandidate(); } structClus.arr = nAttachments; - return true; } @@ -508,9 +507,8 @@ bool StrangenessTracker::updateTrack(const ITSCluster& clus, o2::track::TrackPar return false; } } - auto chi2 = std::abs(track.getPredictedChi2Quiet(clus)); // abs to be understood - LOG(debug) << "Chi2: " << chi2; - if (chi2 > mStrParams->mMaxChi2 || chi2 < 0) { + auto chi2 = track.getPredictedChi2Quiet(clus); // abs to be understood + if (std::abs(chi2) > mStrParams->mMaxChi2) { return false; } From 04baff0b28a95c388a139f2554a77eeab97588dc Mon Sep 17 00:00:00 2001 From: abmodak <67369858+abmodak@users.noreply.github.com> Date: Wed, 23 Apr 2025 18:50:42 +0200 Subject: [PATCH 0291/1764] Add PMD info to AO2Ds (#13998) --- .../include/Framework/AnalysisDataModel.h | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index d90952f38ac9f..3216a24ed73a2 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -1778,6 +1778,23 @@ DECLARE_SOA_COLUMN(DCAr, dcaR, float); //! DCA DECLARE_SOA_COLUMN(DCAz, dcaZ, float); //! DCA in z direction DECLARE_SOA_COLUMN(Mass, mass, float); //! mass of the conversion. Do NOT use for cut! } // namespace oftv0 +namespace pmd +{ +DECLARE_SOA_INDEX_COLUMN(BC, bc); //! BC index +DECLARE_SOA_COLUMN(X, pmdclsx, float); //! cluster x position +DECLARE_SOA_COLUMN(Y, pmdclsy, float); //! cluster y position +DECLARE_SOA_COLUMN(Z, pmdclsz, float); //! cluster z position +DECLARE_SOA_COLUMN(CluADC, pmdclsadc, float); //! cluster energy in ADC +DECLARE_SOA_COLUMN(CluPID, pmdclspid, float); //! cluster probability, 1: photon, 0:hadron +DECLARE_SOA_COLUMN(Det, pmddet, uint8_t); //! Detector, 0:PRE, 1:CPV +DECLARE_SOA_COLUMN(Ncell, pmdncell, uint8_t); //! cluster cells +DECLARE_SOA_COLUMN(Smn, pmdmodule, int32_t); //! module number +DECLARE_SOA_COLUMN(TrackNo, pmdtrackno, int32_t); //! Track number assigned to clus from simulation +DECLARE_SOA_COLUMN(TrackPid, pmdtrackpid, int32_t); //! Track PID assigned to clus from simulation +DECLARE_SOA_COLUMN(SigX, pmdsigx, float); //! Cluster x-width +DECLARE_SOA_COLUMN(SigY, pmdsigy, float); //! Cluster y-width +DECLARE_SOA_COLUMN(ClMatching, pmdclmatching, int32_t); //! Cluster of PRE matching with CPV +} // namespace pmd } // namespace run2 DECLARE_SOA_TABLE(Run2BCInfos_000, "AOD", "RUN2BCINFO", run2::EventCuts, //! Legacy information for Run 2 event selection @@ -1811,6 +1828,14 @@ DECLARE_SOA_TABLE(Run2OTFV0s, "AOD", "Run2OTFV0", //! Run 2 V0 on the fly table using Run2OTFV0 = Run2OTFV0s::iterator; +DECLARE_SOA_TABLE(Pmds, "AOD", "PMD", //! Photon information from PMD detector + o2::soa::Index<>, run2::pmd::BCId, run2::pmd::X, run2::pmd::Y, + run2::pmd::Z, run2::pmd::CluADC, run2::pmd::CluPID, run2::pmd::Det, + run2::pmd::Ncell, run2::pmd::Smn, run2::pmd::TrackNo, run2::pmd::TrackPid, + run2::pmd::SigX, run2::pmd::SigY, run2::pmd::ClMatching); + +using Pmd = Pmds::iterator; + // ---- MC tables ---- namespace mccollision { From 72b50c63aeeb24144bc186d2c4efabc56503cf55 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 24 Apr 2025 07:22:33 +0200 Subject: [PATCH 0292/1764] ITS: Fix GPU deterministic mode + refactoring (#14200) --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 14 +- .../GPU/ITStrackingGPU/TrackerTraitsGPU.h | 29 +- .../GPU/ITStrackingGPU/TrackingKernels.h | 3 +- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 87 ++-- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 445 ++++++++---------- .../tracking/include/ITStracking/Tracker.h | 31 +- .../include/ITStracking/TrackerTraits.h | 30 +- .../include/ITStracking/TrackingInterface.h | 5 +- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 188 ++------ .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 24 +- .../ITS/tracking/src/TrackingInterface.cxx | 19 +- .../ITSMFT/ITS/workflow/src/TrackerSpec.cxx | 2 + 12 files changed, 317 insertions(+), 560 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 29d2404e98681..88666cdfdb7fb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -15,18 +15,13 @@ #include "ITStracking/TimeFrame.h" #include "ITStracking/Configuration.h" - -#include "ITStrackingGPU/ClusterLinesGPU.h" #include "ITStrackingGPU/Utils.h" #include -namespace o2 -{ -namespace its -{ -namespace gpu +namespace o2::its::gpu { + class Stream; class DefaultGPUAllocator : public ExternalAllocator @@ -228,7 +223,6 @@ inline int TimeFrameGPU::getNumberOfCells() const return std::accumulate(mNCells.begin(), mNCells.end(), 0); } -} // namespace gpu -} // namespace its -} // namespace o2 +} // namespace o2::its::gpu + #endif diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index f9583d97ca030..c765307473749 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -13,8 +13,6 @@ #ifndef ITSTRACKINGGPU_TRACKERTRAITSGPU_H_ #define ITSTRACKINGGPU_TRACKERTRAITSGPU_H_ -#include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" #include "ITStracking/TrackerTraits.h" #include "ITStrackingGPU/TimeFrameGPU.h" @@ -24,28 +22,27 @@ namespace its { template -class TrackerTraitsGPU : public TrackerTraits +class TrackerTraitsGPU final : public TrackerTraits { public: TrackerTraitsGPU() = default; ~TrackerTraitsGPU() override = default; - // void computeLayerCells() final; - void adoptTimeFrame(TimeFrame* tf) override; - void initialiseTimeFrame(const int iteration) override; + void adoptTimeFrame(TimeFrame* tf) final; + void initialiseTimeFrame(const int iteration) final; + void computeLayerTracklets(const int iteration, int, int) final; - void computeLayerCells(const int iteration) override; - void setBz(float) override; - void findCellsNeighbours(const int iteration) override; - void findRoads(const int iteration) override; + void computeLayerCells(const int iteration) final; + void findCellsNeighbours(const int iteration) final; + void findRoads(const int iteration) final; + + bool supportsExtendTracks() const noexcept final { return false; } + bool supportsFindShortPrimaries() const noexcept final { return false; } - // Methods to get CPU execution from traits - void initialiseTimeFrameHybrid(const int iteration) override { initialiseTimeFrame(iteration); }; - void computeTrackletsHybrid(const int iteration, int, int) override; - void computeCellsHybrid(const int iteration) override; - void findCellsNeighboursHybrid(const int iteration) override; + void setBz(float) final; - void extendTracks(const int iteration) override; + const char* getName() const noexcept final { return "GPU"; } + bool isGPU() const noexcept final { return true; } // TimeFrameGPU information forwarding int getTFNumberOfClusters() const override; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 720867ddaba29..21b14fd9292d2 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -176,8 +176,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads); -int filterCellNeighboursHandler(std::vector&, - gpuPair*, +int filterCellNeighboursHandler(gpuPair*, int*, unsigned int); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 3c65faddcff71..f3b62ec8a6108 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -11,10 +11,7 @@ /// #include -#include -#include #include -#include #include "DataFormatsITS/TrackITS.h" @@ -41,54 +38,7 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) } template -void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int, int) -{ -} - -template -void TrackerTraitsGPU::computeLayerCells(const int iteration) -{ -} - -template -void TrackerTraitsGPU::findCellsNeighbours(const int iteration) -{ -} - -template -void TrackerTraitsGPU::extendTracks(const int iteration) -{ -} - -template -void TrackerTraitsGPU::setBz(float bz) -{ - mBz = bz; - mTimeFrameGPU->setBz(bz); -} - -template -int TrackerTraitsGPU::getTFNumberOfClusters() const -{ - return mTimeFrameGPU->getNumberOfClusters(); -} - -template -int TrackerTraitsGPU::getTFNumberOfTracklets() const -{ - return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0); -} - -template -int TrackerTraitsGPU::getTFNumberOfCells() const -{ - return mTimeFrameGPU->getNumberOfCells(); -} - -//////////////////////////////////////////////////////////////////////////////// -// Hybrid tracking -template -void TrackerTraitsGPU::computeTrackletsHybrid(const int iteration, int iROFslice, int iVertex) +void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int iROFslice, int iVertex) { auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); mTimeFrameGPU->createTrackletsLUTDevice(iteration); @@ -161,7 +111,7 @@ void TrackerTraitsGPU::computeTrackletsHybrid(const int iteration, int } template -void TrackerTraitsGPU::computeCellsHybrid(const int iteration) +void TrackerTraitsGPU::computeLayerCells(const int iteration) { mTimeFrameGPU->createCellsLUTDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); @@ -208,7 +158,7 @@ void TrackerTraitsGPU::computeCellsHybrid(const int iteration) } template -void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) +void TrackerTraitsGPU::findCellsNeighbours(const int iteration) { mTimeFrameGPU->createNeighboursIndexTablesDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); @@ -250,8 +200,7 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) conf.nBlocks, conf.nThreads); - filterCellNeighboursHandler(mTimeFrameGPU->getCellsNeighbours()[iLayer], - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), + filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), mTimeFrameGPU->getDeviceNeighbours(iLayer), nNeigh); } @@ -270,9 +219,6 @@ void TrackerTraitsGPU::findRoads(const int iteration) if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; } - std::vector lastCellId, updatedCellId; - std::vector lastCellSeed, updatedCellSeed; - processNeighboursHandler(startLayer, startLevel, mTimeFrameGPU->getDeviceArrayCells(), @@ -366,5 +312,30 @@ void TrackerTraitsGPU::findRoads(const int iteration) } }; +template +int TrackerTraitsGPU::getTFNumberOfClusters() const +{ + return mTimeFrameGPU->getNumberOfClusters(); +} + +template +int TrackerTraitsGPU::getTFNumberOfTracklets() const +{ + return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0); +} + +template +int TrackerTraitsGPU::getTFNumberOfCells() const +{ + return mTimeFrameGPU->getNumberOfCells(); +} + +template +void TrackerTraitsGPU::setBz(float bz) +{ + mBz = bz; + mTimeFrameGPU->setBz(bz); +} + template class TrackerTraitsGPU<7>; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 805e66675e1b9..bb39e9e70341b 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -12,11 +12,9 @@ #include #include -#include -#include #include -#include +#include #include #include #include @@ -28,7 +26,6 @@ #include #include "ITStracking/Constants.h" -#include "ITStracking/Configuration.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/MathUtils.h" #include "DataFormatsITS/TrackITS.h" @@ -59,7 +56,7 @@ namespace o2::its using namespace constants::its2; using Vertex = o2::dataformats::Vertex>; -GPUd() float Sq(float v) +GPUdii() float Sq(float v) { return v * v; } @@ -76,15 +73,15 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz; const float phiRangeMax = (maxdeltaphi > constants::math::Pi) ? constants::math::TwoPi : currentCluster.phi + maxdeltaphi; - if (zRangeMax < -LayersZCoordinate()[layerIndex + 1] || - zRangeMin > LayersZCoordinate()[layerIndex + 1] || zRangeMin > zRangeMax) { + if (zRangeMax < -utils.getLayerZ(layerIndex) || + zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) { return getEmptyBinsRect(); } - return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex + 1, zRangeMin)), + return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::CAMath::Min(ZBins - 1, utils.getZBinIndex(layerIndex + 1, zRangeMax)), + o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex, zRangeMax)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; } @@ -184,6 +181,11 @@ struct equal_tracklets { GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex == b.secondClusterIndex; } }; +template +struct sort_by_second { + GPUhd() bool operator()(const gpuPair& a, const gpuPair& b) const { return a.second < b.second; } +}; + template struct pair_to_first { GPUhd() int operator()(const gpuPair& a) const @@ -522,7 +524,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel( const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * MSAngle))}; - const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; + const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { continue; } @@ -800,6 +802,44 @@ GPUg() void printCellSeeds(CellSeed* seed, int nCells, const unsigned int tId = } } } + +template +GPUhi() void cubExclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubExclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubExclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} + +template +GPUhi() void cubInclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubInclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubInclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} } // namespace gpu template @@ -833,7 +873,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>( + gpu::computeLayerTrackletsMultiROFKernel<<>>( utils, multMask, iLayer, @@ -860,22 +901,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, resolutions[iLayer], radii[iLayer + 1] - radii[iLayer], mulScatAng[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } @@ -913,55 +939,42 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, - multMask, - iLayer, - startROF, - endROF, - maxROF, - deltaROF, - vertices, - rofPV, - nVertices, - vertexId, - clusters, - ROFClusters, - usedClusters, - clustersIndexTables, - tracklets, - trackletsLUTs, - iteration, - NSigmaCut, - phiCuts[iLayer], - resolutionPV, - minRs[iLayer + 1], - maxRs[iLayer + 1], - resolutions[iLayer], - radii[iLayer + 1] - radii[iLayer], - mulScatAng[iLayer]); + gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, + multMask, + iLayer, + startROF, + endROF, + maxROF, + deltaROF, + vertices, + rofPV, + nVertices, + vertexId, + clusters, + ROFClusters, + usedClusters, + clustersIndexTables, + tracklets, + trackletsLUTs, + iteration, + NSigmaCut, + phiCuts[iLayer], + resolutionPV, + minRs[iLayer + 1], + maxRs[iLayer + 1], + resolutions[iLayer], + radii[iLayer + 1] - radii[iLayer], + mulScatAng[iLayer]); thrust::device_ptr tracklets_ptr(spanTracklets[iLayer]); thrust::sort(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::sort_tracklets()); auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets()); nTracklets[iLayer] = unique_end - tracklets_ptr; if (iLayer > 0) { GPUChkErrS(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int))); - gpu::compileTrackletsLookupTableKernel<<>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::compileTrackletsLookupTableKernel<<>>( + spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } } @@ -984,7 +997,8 @@ void countCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -998,22 +1012,7 @@ void countCellsHandler( maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float nSigmaCut); // const float - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(cellsLUTsHost, nTracklets + 1); } void computeCellsHandler( @@ -1034,7 +1033,8 @@ void computeCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -1064,7 +1064,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads) { - gpu::computeLayerCellNeighboursKernel<<>>( + gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1076,39 +1077,10 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, nCells, maxCellNeighbours); - void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr; - size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0; - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - GPUChkErrS(cudaMalloc(&d_temp_storage_2, temp_storage_bytes_2)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + gpu::cubInclusiveScanInPlace(neighboursLUT, nCellsNext); + gpu::cubExclusiveScanInPlace(neighboursIndexTable, nCells + 1); unsigned int nNeighbours; GPUChkErrS(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost)); - GPUChkErrS(cudaFree(d_temp_storage)); - GPUChkErrS(cudaFree(d_temp_storage_2)); return nNeighbours; } @@ -1143,32 +1115,18 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, GPUChkErrS(cudaDeviceSynchronize()); } -int filterCellNeighboursHandler(std::vector& neighHost, // TODO: eventually remove this! - gpuPair* cellNeighbourPairs, +int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, int* cellNeighbours, unsigned int nNeigh) { thrust::device_ptr> neighVectorPairs(cellNeighbourPairs); thrust::device_ptr validNeighs(cellNeighbours); - thrust::device_vector keys(nNeigh); // TODO: externally allocate. - thrust::device_vector vals(nNeigh); // TODO: externally allocate. - thrust::copy(thrust::make_transform_iterator(neighVectorPairs, gpu::pair_to_second()), - thrust::make_transform_iterator(neighVectorPairs + nNeigh, gpu::pair_to_second()), - keys.begin()); - thrust::sequence(vals.begin(), vals.end()); - thrust::sort_by_key(keys.begin(), keys.end(), vals.begin()); - thrust::device_vector> sortedNeigh(nNeigh); - thrust::copy(thrust::make_permutation_iterator(neighVectorPairs, vals.begin()), - thrust::make_permutation_iterator(neighVectorPairs, vals.end()), - sortedNeigh.begin()); - GPUChkErrS(cudaDeviceSynchronize()); - auto trimmedBegin = thrust::find_if(sortedNeigh.begin(), sortedNeigh.end(), gpu::is_valid_pair()); // trim leading -1s - auto trimmedSize = sortedNeigh.end() - trimmedBegin; - neighHost.resize(trimmedSize); - thrust::transform(trimmedBegin, sortedNeigh.end(), validNeighs, gpu::pair_to_first()); - GPUChkErrS(cudaMemcpy(neighHost.data(), cellNeighbours, trimmedSize * sizeof(int), cudaMemcpyDeviceToHost)); + auto updatedEnd = thrust::remove_if(neighVectorPairs, neighVectorPairs + nNeigh, gpu::is_invalid_pair()); + size_t newSize = updatedEnd - neighVectorPairs; + thrust::stable_sort(neighVectorPairs, neighVectorPairs + newSize, gpu::sort_by_second()); + thrust::transform(neighVectorPairs, neighVectorPairs + newSize, validNeighs, gpu::pair_to_first()); - return trimmedSize; + return newSize; } template @@ -1190,137 +1148,117 @@ void processNeighboursHandler(const int startLayer, const int nBlocks, const int nThreads) { - thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this. - // thrust::device_vector lastCellIds(lastCellIdHost); - // thrust::device_vector lastCellSeed(lastCellSeedHost); - thrust::device_vector lastCellId, updatedCellId; - thrust::device_vector lastCellSeed, updatedCellSeed; - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - updatedCellId.resize(foundSeedsTable.back()); - updatedCellSeed.resize(foundSeedsTable.back()); - - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - auto t1 = updatedCellSeed.size(); - GPUChkErrS(cudaFree(d_temp_storage)); + thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. + // TODO: fix this. + + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, nCells[startLayer] + 1); + + thrust::device_vector updatedCellId(foundSeedsTable.back()); + thrust::device_vector updatedCellSeed(foundSeedsTable.back()); + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + int level = startLevel; + thrust::device_vector lastCellId; + thrust::device_vector lastCellSeed; for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { - temp_storage_bytes = 0; lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); thrust::device_vector().swap(updatedCellSeed); thrust::device_vector().swap(updatedCellId); auto lastCellSeedSize{lastCellSeed.size()}; - foundSeedsTable.resize(nCells[iLayer] + 1); + foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); - --level; - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + + gpu::processNeighboursKernel<<>>( + iLayer, + --level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, foundSeedsTable.size()); + auto foundSeeds{foundSeedsTable.back()}; updatedCellId.resize(foundSeeds); thrust::fill(updatedCellId.begin(), updatedCellId.end(), 0); updatedCellSeed.resize(foundSeeds); thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::processNeighboursKernel<<>>( + iLayer, + level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); } + thrust::device_vector outSeeds(updatedCellSeed.size()); auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; std::vector outSeedsHost(s); - thrust::copy(updatedCellSeed.begin(), updatedCellSeed.begin() + s, outSeedsHost.begin()); + thrust::copy(outSeeds.begin(), outSeeds.begin() + s, outSeedsHost.begin()); seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end()); } @@ -1339,7 +1277,8 @@ void trackSeedHandler(CellSeed* trackSeeds, const int nThreads) { thrust::device_vector minPts(minPtsHost); - gpu::fitTrackSeedsKernel<<>>( + gpu::fitTrackSeedsKernel<<>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** tracks, // TrackITSExt* diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 58483e4aa9f6f..8f0a471b40c59 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -50,21 +50,14 @@ class TrackerTraits; class Tracker { + using LogFunc = std::function; public: Tracker(TrackerTraits* traits); - Tracker(const Tracker&) = delete; - Tracker& operator=(const Tracker&) = delete; - ~Tracker(); - void adoptTimeFrame(TimeFrame& tf); - void clustersToTracks( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); - void clustersToTracksHybrid( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); - std::vector& getTracks(); + void clustersToTracks(LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); void setParameters(const std::vector&); std::vector& getParameters() { return mTrkParams; } @@ -74,7 +67,7 @@ class Tracker bool isMatLUT() const; void setNThreads(int n); int getNThreads() const; - std::uint32_t mTimeFrameCounter = 0; + void printSummary() const; private: void initialiseTimeFrame(int& iteration); @@ -82,16 +75,7 @@ class Tracker void computeCells(int& iteration); void findCellsNeighbours(int& iteration); void findRoads(int& iteration); - - void initialiseTimeFrameHybrid(int& iteration); - void computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex); - void computeCellsHybrid(int& iteration); - void findCellsNeighboursHybrid(int& iteration); - void findRoadsHybrid(int& iteration); - void findTracksHybrid(int& iteration); - void findShortPrimaries(); - void findTracks(); void extendTracks(int& iteration); // MC interaction @@ -100,7 +84,7 @@ class Tracker void rectifyClusterIndices(); template - float evaluateTask(void (Tracker::*)(T...), const char*, std::function logger, T&&... args); + float evaluateTask(void (Tracker::*)(T...), const char*, LogFunc logger, T&&... args); TrackerTraits* mTraits = nullptr; /// Observer pointer, not owned by this class TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class @@ -108,7 +92,9 @@ class Tracker std::vector mTrkParams; o2::gpu::GPUChainITS* mRecoChain = nullptr; - unsigned int mNumberOfRuns{0}; + unsigned int mNumberOfDroppedTFs{0}; + unsigned int mTimeFrameCounter{0}; + double mTotalTime{0}; }; inline void Tracker::setParameters(const std::vector& trkPars) @@ -117,8 +103,7 @@ inline void Tracker::setParameters(const std::vector& trkPar } template -float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, std::function logger, - T&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, LogFunc logger, T&&... args) { float diff{0.f}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 46499db92d4d5..6b514c6e8d000 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -16,23 +16,12 @@ #ifndef TRACKINGITSU_INCLUDE_TRACKERTRAITS_H_ #define TRACKINGITSU_INCLUDE_TRACKERTRAITS_H_ -#include -#include #include -#include -#include -#include -#include -#include -#include #include "DetectorsBase/Propagator.h" -#include "DetectorsBase/MatLayerCylSet.h" #include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" #include "ITStracking/MathUtils.h" #include "ITStracking/TimeFrame.h" -#include "ITStracking/Road.h" // #define OPTIMISATION_OUTPUT @@ -52,30 +41,29 @@ class TrackerTraits virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf); virtual void initialiseTimeFrame(const int iteration); + virtual void computeLayerTracklets(const int iteration, int iROFslice, int iVertex); virtual void computeLayerCells(const int iteration); virtual void findCellsNeighbours(const int iteration); virtual void findRoads(const int iteration); - virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(error, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } - virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(error, "computeTrackletsHybrid: this method should never be called with CPU traits"); } - virtual void computeCellsHybrid(const int iteration) { LOGP(error, "computeCellsHybrid: this method should never be called with CPU traits"); } - virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(error, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } - virtual void findRoadsHybrid(const int iteration) { LOGP(error, "findRoadsHybrid: this method should never be called with CPU traits"); } - virtual void findTracksHybrid(const int iteration) { LOGP(error, "findTracksHybrid: this method should never be called with CPU traits"); } - virtual void findTracks() { LOGP(error, "findTracks: this method is deprecated."); } + + virtual bool supportsExtendTracks() const noexcept { return true; } virtual void extendTracks(const int iteration); + virtual bool supportsFindShortPrimaries() const noexcept { return true; } virtual void findShortPrimaries(); - virtual void setBz(float bz); + virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); virtual void processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeed, std::vector& updatedCellId); void UpdateTrackingParameters(const std::vector& trkPars); TimeFrame* getTimeFrame() { return mTimeFrame; } - void setIsGPU(const unsigned char isgpu) { mIsGPU = isgpu; }; + virtual void setBz(float bz); float getBz() const; void setCorrType(const o2::base::PropagatorImpl::MatCorrType type) { mCorrType = type; } bool isMatLUT() const; + virtual const char* getName() const noexcept { return "CPU"; } + virtual bool isGPU() const noexcept { return false; } // Others GPUhd() static consteval int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; } @@ -109,13 +97,11 @@ class TrackerTraits o2::gpu::GPUChainITS* mChain = nullptr; TimeFrame* mTimeFrame; std::vector mTrkParams; - bool mIsGPU = false; }; inline void TrackerTraits::initialiseTimeFrame(const int iteration) { mTimeFrame->initialise(iteration, mTrkParams[iteration], mTrkParams[iteration].NLayers); - setIsGPU(false); } inline float TrackerTraits::getBz() const diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index b584bf6b8008b..6eacb94ebb1ea 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -37,9 +37,7 @@ class ITSTrackingInterface const bool overrBeamEst) : mIsMC{isMC}, mUseTriggers{trgType}, - mOverrideBeamEstimation{overrBeamEst} - { - } + mOverrideBeamEstimation{overrBeamEst} {} void setClusterDictionary(const o2::itsmft::TopologyDictionary* d) { mDict = d; } void setMeanVertex(const o2::dataformats::MeanVertexObject* v) @@ -56,6 +54,7 @@ class ITSTrackingInterface void initialise(); template void run(framework::ProcessingContext& pc); + void printSummary() const; virtual void updateTimeDependentParams(framework::ProcessingContext& pc); virtual void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj); diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 50dc1f5dfd039..c23ba0576c625 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -25,8 +25,7 @@ #include "ReconstructionDataFormats/Track.h" #include -#include -#include +#include #include #include #include @@ -37,17 +36,16 @@ namespace its { using o2::its::constants::GB; -Tracker::Tracker(o2::its::TrackerTraits* traits) +Tracker::Tracker(o2::its::TrackerTraits* traits) : mTraits(traits) { /// Initialise standard configuration with 1 iteration mTrkParams.resize(1); - mTraits = traits; } -Tracker::~Tracker() = default; - -void Tracker::clustersToTracks(std::function logger, std::function error) +void Tracker::clustersToTracks(LogFunc logger, LogFunc error) { + LogFunc evalLog = [](const std::string&) {}; + double total{0}; mTraits->UpdateTrackingParameters(mTrkParams); int maxNvertices{-1}; @@ -62,22 +60,20 @@ void Tracker::clustersToTracks(std::function logger, std::f if (iteration == 3 && mTrkParams[0].DoUPCIteration) { mTimeFrame->swapMasks(); } - logger(fmt::format("ITS Tracking iteration {} summary:", iteration)); double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; int iVertex{std::min(maxNvertices, 0)}; + logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration)); + total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); do { for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); + timeTracklets += evaluateTask(&Tracker::computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -86,17 +82,16 @@ void Tracker::clustersToTracks(std::function logger, std::f } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); break; } - timeCells += evaluateTask( - &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); + timeCells += evaluateTask(&Tracker::computeCells, "Cell finding", evalLog, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -105,131 +100,53 @@ void Tracker::clustersToTracks(std::function logger, std::f } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); break; } - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighbours, "Neighbour finding", [](std::string) {}, iteration); + timeNeighbours += evaluateTask(&Tracker::findCellsNeighbours, "Neighbour finding", evalLog, iteration); nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); + timeRoads += evaluateTask(&Tracker::findRoads, "Road finding", evalLog, iteration); } iVertex++; } while (iVertex < maxNvertices && !dropTF); - logger(fmt::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); + logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); + logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); + logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); + logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if (mTrkParams[iteration].UseTrackFollower) { + if (mTraits->supportsExtendTracks() && mTrkParams[iteration].UseTrackFollower && !dropTF) { int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", [](const std::string&) {}, iteration); total += timeExtending; - logger(fmt::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); + logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); } if (dropTF) { - error(fmt::format("...Dropping Timeframe...")); + error("...Dropping Timeframe..."); mTimeFrame->dropTracks(); - break; // breaking out the iterations loop + ++mNumberOfDroppedTFs; + return; } } - total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); - - std::stringstream sstream; - if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; + if (mTraits->supportsFindShortPrimaries() && mTrkParams[0].FindShortTracks) { + auto nTracksB = mTimeFrame->getNumberOfTracks(); + total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); + auto nTracksA = mTimeFrame->getNumberOfTracks(); + logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); } - logger(sstream.str()); - if (mTimeFrame->hasMCinformation()) { - computeTracksMClabels(); - } - rectifyClusterIndices(); - mNumberOfRuns++; -} - -void Tracker::clustersToTracksHybrid(std::function logger, std::function error) -{ - double total{0.}; - mTraits->UpdateTrackingParameters(mTrkParams); - int maxNvertices{-1}; - if (mTrkParams[0].PerPrimaryVertexProcessing) { - for (int iROF{0}; iROF < mTimeFrame->getNrof(); ++iROF) { - maxNvertices = std::max(maxNvertices, (int)mTimeFrame->getPrimaryVertices(iROF).size()); - } - } - - for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { - int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; - logger(fmt::format("=========== ITS Hybrid Tracking iteration {} summary ===========", iteration, nROFsIterations, maxNvertices)); - double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; - int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrameHybrid, "Hybrid Timeframe initialisation", logger, iteration); - int iVertex{std::min(maxNvertices, 0)}; - - do { - for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTrackletsHybrid, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); - nTracklets += mTraits->getTFNumberOfTracklets(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; - if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit)); - break; - } - - timeCells += evaluateTask( - &Tracker::computeCellsHybrid, "Cell finding", [](std::string) {}, iteration); - nCells += mTraits->getTFNumberOfCells(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; - if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit)); - break; - } - - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighboursHybrid, "Neighbour finding", [](std::string) {}, iteration); - nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); - } - iVertex++; - } while (iVertex < maxNvertices); - logger(fmt::format(" - Hybrid tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Hybrid cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Hybrid neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Hybrid track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); - total += timeTracklets + timeCells + timeNeighbours + timeRoads; - // total += evaluateTask(&Tracker::extendTracks, "Hybrid extending tracks", logger, iteration); - } - - // total += evaluateTask(&Tracker::findShortPrimaries, "Hybrid short primaries finding", logger); - - std::stringstream sstream; if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; + logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeFrameCounter, total, mTraits->getNThreads())); } - logger(sstream.str()); if (mTimeFrame->hasMCinformation()) { computeTracksMClabels(); } rectifyClusterIndices(); - mNumberOfRuns++; + ++mTimeFrameCounter; + mTotalTime += total; } void Tracker::initialiseTimeFrame(int& iteration) @@ -257,41 +174,6 @@ void Tracker::findRoads(int& iteration) mTraits->findRoads(iteration); } -void Tracker::initialiseTimeFrameHybrid(int& iteration) -{ - mTraits->initialiseTimeFrameHybrid(iteration); -} - -void Tracker::computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex) -{ - mTraits->computeTrackletsHybrid(iteration, iROFslice, iVertex); // placeholder for the proper ROF/vertex slicing -} - -void Tracker::computeCellsHybrid(int& iteration) -{ - mTraits->computeCellsHybrid(iteration); -} - -void Tracker::findCellsNeighboursHybrid(int& iteration) -{ - mTraits->findCellsNeighboursHybrid(iteration); -} - -void Tracker::findRoadsHybrid(int& iteration) -{ - mTraits->findRoadsHybrid(iteration); -} - -void Tracker::findTracksHybrid(int& iteration) -{ - mTraits->findTracksHybrid(iteration); -} - -void Tracker::findTracks() -{ - mTraits->findTracks(); -} - void Tracker::extendTracks(int& iteration) { mTraits->extendTracks(iteration); @@ -575,5 +457,11 @@ int Tracker::getNThreads() const { return mTraits->getNThreads(); } + +void Tracker::printSummary() const +{ + LOGP(info, "Tracker summary: Processed {} TFs (dropped {}) in TOT={:.2f} s, AVG/TF={:.2f} s", mTimeFrameCounter, mNumberOfDroppedTFs, mTotalTime * 1.e-3, mTotalTime * 1.e-3 / ((mTimeFrameCounter > 0) ? (double)mTimeFrameCounter : -1.0)); +} + } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 8dcb7bfd315c1..987e8e3128fb4 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -19,7 +19,9 @@ #include #include -#include +#ifdef OPTIMISATION_OUTPUT +#include +#endif #include "CommonConstants/MathConstants.h" #include "DetectorsBase/Propagator.h" @@ -38,7 +40,7 @@ using o2::base::PropagatorF; namespace { -float Sq(float q) +inline float Sq(float q) { return q * q; } @@ -57,7 +59,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in #ifdef OPTIMISATION_OUTPUT static int iter{0}; - std::ofstream off(fmt::format("tracklets{}.txt", iter++)); + std::ofstream off(std::format("tracklets{}.txt", iter++)); #endif for (int iLayer = 0; iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { @@ -173,7 +175,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in break; } } - off << fmt::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; #endif if (deltaZ / sigmaZ < mTrkParams[iteration].NSigmaCut && @@ -270,7 +272,7 @@ void TrackerTraits::computeLayerCells(const int iteration) { #ifdef OPTIMISATION_OUTPUT static int iter{0}; - std::ofstream off(fmt::format("cells{}.txt", iter++)); + std::ofstream off(std::format("cells{}.txt", iter++)); #endif for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { @@ -318,7 +320,7 @@ void TrackerTraits::computeLayerCells(const int iteration) #ifdef OPTIMISATION_OUTPUT bool good{tf->getTrackletsLabel(iLayer)[iTracklet] == tf->getTrackletsLabel(iLayer + 1)[iNextTracklet]}; float signedDelta{currentTracklet.tanLambda - nextTracklet.tanLambda}; - off << fmt::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; #endif if (deltaTanLambda / mTrkParams[iteration].CellDeltaTanLambdaSigma < mTrkParams[iteration].NSigmaCut) { @@ -402,7 +404,7 @@ void TrackerTraits::computeLayerCells(const int iteration) void TrackerTraits::findCellsNeighbours(const int iteration) { #ifdef OPTIMISATION_OUTPUT - std::ofstream off(fmt::format("cellneighs{}.txt", iteration)); + std::ofstream off(std::format("cellneighs{}.txt", iteration)); #endif for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) { const int nextLayerCellsNum{static_cast(mTimeFrame->getCells()[iLayer + 1].size())}; @@ -439,7 +441,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) #ifdef OPTIMISATION_OUTPUT bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]}; - off << fmt::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl; + off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl; #endif if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) { @@ -469,6 +471,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) void TrackerTraits::processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeeds, std::vector& updatedCellsIds) { + bool print = iLayer == 3 && iLevel == 2; if (iLevel < 2 || iLayer < 1) { std::cout << "Error: layer " << iLayer << " or level " << iLevel << " cannot be processed by processNeighbours" << std::endl; exit(1); @@ -723,10 +726,7 @@ void TrackerTraits::extendTracks(const int iteration) void TrackerTraits::findShortPrimaries() { - if (!mTrkParams[0].FindShortTracks) { - return; - } - auto propagator = o2::base::Propagator::Instance(); + const auto propagator = o2::base::Propagator::Instance(); mTimeFrame->fillPrimaryVerticesXandAlpha(); for (auto& cell : mTimeFrame->getCells()[0]) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 613402ce56e97..f0dad2722a301 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -310,18 +310,10 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) mTimeFrame->setMultiplicityCutMask(processingMask); mTimeFrame->setROFMask(processUPCMask); // Run CA tracker - if constexpr (isGPU) { - if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracksHybrid(logger, fatalLogger); - } else { - mTracker->clustersToTracksHybrid(logger, errorLogger); - } + if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { + mTracker->clustersToTracks(logger, fatalLogger); } else { - if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracks(logger, fatalLogger); - } else { - mTracker->clustersToTracks(logger, errorLogger); - } + mTracker->clustersToTracks(logger, errorLogger); } size_t totTracks{mTimeFrame->getNumberOfTracks()}, totClusIDs{mTimeFrame->getNumberOfUsedClusters()}; allTracks.reserve(totTracks); @@ -438,6 +430,11 @@ void ITSTrackingInterface::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) } } +void ITSTrackingInterface::printSummary() const +{ + mTracker->printSummary(); +} + void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, TrackerTraits* trackerTraits, TimeFrame* frame) diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index 9e4c98ad6e9a1..abbb88aea42fa 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -48,6 +48,7 @@ void TrackerDPL::init(InitContext& ic) void TrackerDPL::stop() { + mITSTrackingInterface.printSummary(); LOGF(info, "CPU Reconstruction total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } @@ -69,6 +70,7 @@ void TrackerDPL::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) void TrackerDPL::endOfStream(EndOfStreamContext& ec) { + mITSTrackingInterface.printSummary(); LOGF(info, "ITS CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } From 7175de4628fc409f267ae9eb3ebd2ccbe163b569 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Thu, 24 Apr 2025 09:15:53 +0200 Subject: [PATCH 0293/1764] Fixing member variable naming (#14217) * Fixing member variable naming * Please consider the following formatting changes * Changing to mPImplOrt --------- Co-authored-by: ALICE Action Bot --- Common/ML/include/ML/OrtInterface.h | 30 +-- Common/ML/src/OrtInterface.cxx | 250 +++++++++--------- .../Global/GPUChainTrackingClusterizer.cxx | 172 ++++++------ .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 78 +++--- .../TPCClusterFinder/GPUTPCNNClusterizer.h | 56 ++-- .../GPUTPCNNClusterizerHost.cxx | 82 +++--- .../GPUTPCNNClusterizerHost.h | 12 +- .../GPUTPCNNClusterizerKernels.cxx | 210 +++++++-------- 8 files changed, 444 insertions(+), 446 deletions(-) diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index ea70e28c0421c..b4f40f3f5c694 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -70,23 +70,23 @@ class OrtModel Ort::SessionOptions* getSessionOptions(); Ort::MemoryInfo* getMemoryInfo(); Ort::Env* getEnv(); - int32_t getIntraOpNumThreads() const { return intraOpNumThreads; } - int32_t getInterOpNumThreads() const { return interOpNumThreads; } + int32_t getIntraOpNumThreads() const { return mIntraOpNumThreads; } + int32_t getInterOpNumThreads() const { return mInterOpNumThreads; } // Setters - void setDeviceId(int32_t id) { deviceId = id; } + void setDeviceId(int32_t id) { mDeviceId = id; } void setIO(); - void setActiveThreads(int threads) { intraOpNumThreads = threads; } + void setActiveThreads(int threads) { mIntraOpNumThreads = threads; } void setIntraOpNumThreads(int threads) { - if (deviceType == "CPU") { - intraOpNumThreads = threads; + if (mDeviceType == "CPU") { + mIntraOpNumThreads = threads; } } void setInterOpNumThreads(int threads) { - if (deviceType == "CPU") { - interOpNumThreads = threads; + if (mDeviceType == "CPU") { + mInterOpNumThreads = threads; } } void setEnv(Ort::Env*); @@ -113,19 +113,19 @@ class OrtModel private: // ORT variables -> need to be hidden as pImpl struct OrtVariables; - OrtVariables* pImplOrt; + OrtVariables* mPImplOrt; // Input & Output specifications of the loaded network - std::vector inputNamesChar, outputNamesChar; + std::vector mInputNamesChar, mOutputNamesChar; std::vector mInputNames, mOutputNames; - std::vector> mInputShapes, mOutputShapes, inputShapesCopy, outputShapesCopy; // Input shapes - std::vector inputSizePerNode, outputSizePerNode; // Output shapes - int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs + std::vector> mInputShapes, mOutputShapes, mInputShapesCopy, mOutputShapesCopy; // Input shapes + std::vector mInputSizePerNode, mOutputSizePerNode; // Output shapes + int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs // Environment settings bool mInitialized = false; - std::string modelPath, envName = "", deviceType = "CPU", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda - int32_t intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = -1, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; + std::string mModelPath, mEnvName = "", mDeviceType = "CPU", mThreadAffinity = ""; // device options should be cpu, rocm, migraphx, cuda + int32_t mIntraOpNumThreads = 1, mInterOpNumThreads = 1, mDeviceId = -1, mEnableProfiling = 0, mLoggingLevel = 0, mAllocateDeviceMemory = 0, mEnableOptimizations = 0; std::string printShape(const std::vector&); std::string printShape(const std::vector>&, std::vector&); diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index a8a20b11f9e64..df7f0a2deba82 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -41,7 +41,7 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c // General purpose void OrtModel::initOptions(std::unordered_map optionsMap) { - pImplOrt = new OrtVariables(); + mPImplOrt = new OrtVariables(); // Load from options map if (!optionsMap.contains("model-path")) { @@ -49,49 +49,49 @@ void OrtModel::initOptions(std::unordered_map optionsM } if (!optionsMap["model-path"].empty()) { - modelPath = optionsMap["model-path"]; - deviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU"); - deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1); - allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); - intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); - interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); - loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); - enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); - enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); - envName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference"); - - if (deviceType == "CPU") { - (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); - (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); - if (intraOpNumThreads > 1 || interOpNumThreads > 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); - } else if (intraOpNumThreads == 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + mModelPath = optionsMap["model-path"]; + mDeviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU"); + mDeviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1); + mAllocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); + mIntraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); + mInterOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); + mLoggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); + mEnableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); + mEnableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); + mEnvName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference"); + + if (mDeviceType == "CPU") { + (mPImplOrt->sessionOptions).SetIntraOpNumThreads(mIntraOpNumThreads); + (mPImplOrt->sessionOptions).SetInterOpNumThreads(mInterOpNumThreads); + if (mIntraOpNumThreads > 1 || mInterOpNumThreads > 1) { + (mPImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); + } else if (mIntraOpNumThreads == 1) { + (mPImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); } - if (loggingLevel < 2) { - LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; + if (mLoggingLevel < 2) { + LOG(info) << "(ORT) CPU execution provider set with " << mIntraOpNumThreads << " (mIntraOpNumThreads) and " << mInterOpNumThreads << " (mInterOpNumThreads) threads"; } } // OrtROCMProviderOptions rocm_options{}; - // (pImplOrt->sessionOptions).AppendExecutionProvider_ROCM(rocm_options); + // (mPImplOrt->sessionOptions).AppendExecutionProvider_ROCM(rocm_options); - (pImplOrt->sessionOptions).DisableMemPattern(); - (pImplOrt->sessionOptions).DisableCpuMemArena(); + (mPImplOrt->sessionOptions).DisableMemPattern(); + (mPImplOrt->sessionOptions).DisableCpuMemArena(); - if (enableProfiling) { + if (mEnableProfiling) { if (optionsMap.contains("profiling-output-path")) { - (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); + (mPImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); } else { LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now."; - (pImplOrt->sessionOptions).DisableProfiling(); + (mPImplOrt->sessionOptions).DisableProfiling(); } } else { - (pImplOrt->sessionOptions).DisableProfiling(); + (mPImplOrt->sessionOptions).DisableProfiling(); } - (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); - (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); + (mPImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(mEnableOptimizations)); + (mPImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(mLoggingLevel)); mInitialized = true; } else { @@ -101,9 +101,9 @@ void OrtModel::initOptions(std::unordered_map optionsM void OrtModel::initEnvironment() { - pImplOrt->env = std::make_shared( - OrtLoggingLevel(loggingLevel), - (envName.empty() ? "ORT" : envName.c_str()), + mPImplOrt->env = std::make_shared( + OrtLoggingLevel(mLoggingLevel), + (mEnvName.empty() ? "ORT" : mEnvName.c_str()), // Integrate ORT logging into Fairlogger [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) { if (severity == ORT_LOGGING_LEVEL_VERBOSE) { @@ -121,20 +121,20 @@ void OrtModel::initEnvironment() } }, (void*)3); - (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events + (mPImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events } void OrtModel::initSession() { - if (allocateDeviceMemory) { - memoryOnDevice(deviceId); + if (mAllocateDeviceMemory) { + memoryOnDevice(mDeviceId); } - pImplOrt->session = std::make_shared(*pImplOrt->env, modelPath.c_str(), pImplOrt->sessionOptions); - pImplOrt->ioBinding = std::make_unique(*pImplOrt->session); + mPImplOrt->session = std::make_shared(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->ioBinding = std::make_unique(*mPImplOrt->session); setIO(); - if (loggingLevel < 2) { + if (mLoggingLevel < 2) { LOG(info) << "(ORT) Model loaded successfully! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")"; } } @@ -142,47 +142,47 @@ void OrtModel::initSession() void OrtModel::memoryOnDevice(int32_t deviceIndex) { if (deviceIndex >= 0) { - (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); - (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h - (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time - (pImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (mPImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); + (mPImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h + (mPImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (mPImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time // Arena memory shrinkage comes at performance cost /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; - // (pImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + // (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 std::string dev_mem_str = ""; - if (deviceType == "ROCM") { + if (mDeviceType == "ROCM") { dev_mem_str = "Hip"; } - if (deviceType == "CUDA") { + if (mDeviceType == "CUDA") { dev_mem_str = "Cuda"; } - pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault); - if (loggingLevel < 2) { - LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt; + mPImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault); + if (mLoggingLevel < 2) { + LOG(info) << "(ORT) Memory info set to on-device memory for device type " << mDeviceType << " with ID " << deviceIndex << " and mPImplOrt pointer " << mPImplOrt; } } } void OrtModel::resetSession() { - pImplOrt->session = std::make_shared(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions); + mPImplOrt->session = std::make_shared(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); } // Getters Ort::SessionOptions* OrtModel::getSessionOptions() { - return &pImplOrt->sessionOptions; + return &mPImplOrt->sessionOptions; } Ort::MemoryInfo* OrtModel::getMemoryInfo() { - return &pImplOrt->memoryInfo; + return &mPImplOrt->memoryInfo; } Ort::Env* OrtModel::getEnv() { - return (pImplOrt->env).get(); + return (mPImplOrt->env).get(); } template @@ -202,37 +202,37 @@ std::vector OrtModel::v2v(std::vector& input, bool clearInput) void OrtModel::setIO() { - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get()); + for (size_t i = 0; i < (mPImplOrt->session)->GetInputCount(); ++i) { + mInputNames.push_back((mPImplOrt->session)->GetInputNameAllocated(i, mPImplOrt->allocator).get()); } - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + for (size_t i = 0; i < (mPImplOrt->session)->GetInputCount(); ++i) { + mInputShapes.emplace_back((mPImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get()); + for (size_t i = 0; i < (mPImplOrt->session)->GetOutputCount(); ++i) { + mOutputNames.push_back((mPImplOrt->session)->GetOutputNameAllocated(i, mPImplOrt->allocator).get()); } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + for (size_t i = 0; i < (mPImplOrt->session)->GetOutputCount(); ++i) { + mOutputShapes.emplace_back((mPImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); } - inputNamesChar.resize(mInputNames.size(), nullptr); - std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar), + mInputNamesChar.resize(mInputNames.size(), nullptr); + std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(mInputNamesChar), [&](const std::string& str) { return str.c_str(); }); - outputNamesChar.resize(mOutputNames.size(), nullptr); - std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), + mOutputNamesChar.resize(mOutputNames.size(), nullptr); + std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(mOutputNamesChar), [&](const std::string& str) { return str.c_str(); }); - inputShapesCopy = mInputShapes; - outputShapesCopy = mOutputShapes; - inputSizePerNode.resize(mInputShapes.size(), 1); - outputSizePerNode.resize(mOutputShapes.size(), 1); + mInputShapesCopy = mInputShapes; + mOutputShapesCopy = mOutputShapes; + mInputSizePerNode.resize(mInputShapes.size(), 1); + mOutputSizePerNode.resize(mOutputShapes.size(), 1); mInputsTotal = 1; for (size_t i = 0; i < mInputShapes.size(); ++i) { if (mInputShapes[i].size() > 0) { for (size_t j = 1; j < mInputShapes[i].size(); ++j) { if (mInputShapes[i][j] > 0) { mInputsTotal *= mInputShapes[i][j]; - inputSizePerNode[i] *= mInputShapes[i][j]; + mInputSizePerNode[i] *= mInputShapes[i][j]; } } } @@ -243,7 +243,7 @@ void OrtModel::setIO() for (size_t j = 1; j < mOutputShapes[i].size(); ++j) { if (mOutputShapes[i][j] > 0) { mOutputsTotal *= mOutputShapes[i][j]; - outputSizePerNode[i] *= mOutputShapes[i][j]; + mOutputSizePerNode[i] *= mOutputShapes[i][j]; } } } @@ -252,7 +252,7 @@ void OrtModel::setIO() void OrtModel::setEnv(Ort::Env* env) { - pImplOrt->env = std::shared_ptr(env); + mPImplOrt->env = std::shared_ptr(env); } // Inference @@ -266,12 +266,12 @@ std::vector OrtModel::inference(std::vector& input) } std::vector inputTensor; if constexpr (std::is_same_v) { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); + inputTensor.emplace_back(Ort::Value::CreateTensor(mPImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); } else { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); + inputTensor.emplace_back(Ort::Value::CreateTensor(mPImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); } // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); + auto outputTensors = (mPImplOrt->session)->Run(mPImplOrt->runOptions, mInputNamesChar.data(), inputTensor.data(), inputTensor.size(), mOutputNamesChar.data(), mOutputNamesChar.size()); O* outputValues = outputTensors[0].template GetTensorMutableData(); std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; outputTensors.clear(); @@ -292,22 +292,22 @@ void OrtModel::inference(I* input, int64_t input_size, O* output) std::vector inputShape{input_size, (int64_t)mInputShapes[0][1]}; Ort::Value inputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, reinterpret_cast(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } else { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } - (pImplOrt->ioBinding)->BindInput(mInputNames[0].c_str(), inputTensor); + (mPImplOrt->ioBinding)->BindInput(mInputNames[0].c_str(), inputTensor); std::vector outputShape{input_size, mOutputShapes[0][1]}; Ort::Value outputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { - outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + outputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, reinterpret_cast(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); } else { - outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + outputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); } - (pImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); + (mPImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); - (pImplOrt->session)->Run(pImplOrt->runOptions, *pImplOrt->ioBinding); + (mPImplOrt->session)->Run(mPImplOrt->runOptions, *mPImplOrt->ioBinding); } template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); @@ -318,56 +318,56 @@ template void OrtModel::inference(float*, int64_t, float*); template void OrtModel::inference(I** input, int64_t input_size, O* output) { - std::vector inputTensors(inputShapesCopy.size()); + std::vector inputTensors(mInputShapesCopy.size()); - for (size_t i = 0; i < inputShapesCopy.size(); ++i) { + for (size_t i = 0; i < mInputShapesCopy.size(); ++i) { - inputShapesCopy[i][0] = input_size; // batch-size - outputShapesCopy[i][0] = input_size; // batch-size + mInputShapesCopy[i][0] = input_size; // batch-size + mOutputShapesCopy[i][0] = input_size; // batch-size if constexpr (std::is_same_v) { inputTensors[i] = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, reinterpret_cast(input[i]), - inputSizePerNode[i] * input_size, - inputShapesCopy[i].data(), - inputShapesCopy[i].size()); + mInputSizePerNode[i] * input_size, + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size()); } else { inputTensors[i] = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, input[i], - inputSizePerNode[i] * input_size, - inputShapesCopy[i].data(), - inputShapesCopy[i].size()); + mInputSizePerNode[i] * input_size, + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size()); } } Ort::Value outputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { outputTensor = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, reinterpret_cast(output), - outputSizePerNode[0] * input_size, // assumes that there is only one output node - outputShapesCopy[0].data(), - outputShapesCopy[0].size()); + mOutputSizePerNode[0] * input_size, // assumes that there is only one output node + mOutputShapesCopy[0].data(), + mOutputShapesCopy[0].size()); } else { outputTensor = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, output, - outputSizePerNode[0] * input_size, // assumes that there is only one output node - outputShapesCopy[0].data(), - outputShapesCopy[0].size()); + mOutputSizePerNode[0] * input_size, // assumes that there is only one output node + mOutputShapesCopy[0].data(), + mOutputShapesCopy[0].size()); } // === Run inference === - pImplOrt->session->Run( - pImplOrt->runOptions, - inputNamesChar.data(), + mPImplOrt->session->Run( + mPImplOrt->runOptions, + mInputNamesChar.data(), inputTensors.data(), - inputNamesChar.size(), - outputNamesChar.data(), + mInputNamesChar.size(), + mOutputNamesChar.data(), &outputTensor, - outputNamesChar.size()); + mOutputNamesChar.size()); } template void OrtModel::inference(OrtDataType::Float16_t**, int64_t, OrtDataType::Float16_t*); @@ -382,37 +382,37 @@ std::vector OrtModel::inference(std::vector>& inputs) for (size_t i = 0; i < inputs.size(); ++i) { - inputShapesCopy[i][0] = inputs[i].size() / inputSizePerNode[i]; // batch-size + mInputShapesCopy[i][0] = inputs[i].size() / mInputSizePerNode[i]; // batch-size if constexpr (std::is_same_v) { input_tensors.emplace_back( Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, reinterpret_cast(inputs[i].data()), - inputSizePerNode[i] * inputShapesCopy[i][0], - inputShapesCopy[i].data(), - inputShapesCopy[i].size())); + mInputSizePerNode[i] * mInputShapesCopy[i][0], + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size())); } else { input_tensors.emplace_back( Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, inputs[i].data(), - inputSizePerNode[i] * inputShapesCopy[i][0], - inputShapesCopy[i].data(), - inputShapesCopy[i].size())); + mInputSizePerNode[i] * mInputShapesCopy[i][0], + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size())); } } - int32_t totalOutputSize = mOutputsTotal * inputShapesCopy[0][0]; + int32_t totalOutputSize = mOutputsTotal * mInputShapesCopy[0][0]; // === Run inference === - auto output_tensors = pImplOrt->session->Run( - pImplOrt->runOptions, - inputNamesChar.data(), + auto output_tensors = mPImplOrt->session->Run( + mPImplOrt->runOptions, + mInputNamesChar.data(), input_tensors.data(), input_tensors.size(), - outputNamesChar.data(), - outputNamesChar.size()); + mOutputNamesChar.data(), + mOutputNamesChar.size()); // === Extract output values === O* output_data = output_tensors[0].template GetTensorMutableData(); @@ -428,9 +428,9 @@ template std::vector OrtModel::inferencesession->EndProfiling(); + // mPImplOrt->session->EndProfiling(); // } - LOG(info) << "(ORT) Size of pImplOrt: " << sizeof(*pImplOrt) << " bytes"; + LOG(info) << "(ORT) Size of mPImplOrt: " << sizeof(*mPImplOrt) << " bytes"; } // private diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 37c12b2a3b3f4..630c2200e5900 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -645,41 +645,41 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // bool recreateMemoryAllocator = false; mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) { nnApplications[lane].init(nn_settings); - if (nnApplications[lane].modelsUsed[0]) { - SetONNXGPUStream(*(nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId); - (nnApplications[lane].model_class).setDeviceId(deviceId); - if (nnApplications[lane].model_class.getIntraOpNumThreads() > maxThreads) { - nnApplications[lane].model_class.setIntraOpNumThreads(maxThreads); + if (nnApplications[lane].mModelsUsed[0]) { + SetONNXGPUStream(*(nnApplications[lane].mModelClass).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].mModelClass).setDeviceId(deviceId); + if (nnApplications[lane].mModelClass.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].mModelClass.setIntraOpNumThreads(maxThreads); } - (nnApplications[lane].model_class).initEnvironment(); + (nnApplications[lane].mModelClass).initEnvironment(); // Registering this once seems to be enough, even with different environmnents / models. ONNX apparently uses this per device and stores the OrtAllocator internally. All models will then use the volatile allocation. // But environment must be valid, so we init the model environment first and use it here afterwards. // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); // recreateMemoryAllocator = true; - (nnApplications[lane].model_class).initSession(); + (nnApplications[lane].mModelClass).initSession(); } - if (nnApplications[lane].modelsUsed[1]) { - SetONNXGPUStream(*(nnApplications[lane].model_reg_1).getSessionOptions(), lane, &deviceId); - (nnApplications[lane].model_reg_1).setDeviceId(deviceId); - if (nnApplications[lane].model_reg_1.getIntraOpNumThreads() > maxThreads) { - nnApplications[lane].model_reg_1.setIntraOpNumThreads(maxThreads); + if (nnApplications[lane].mModelsUsed[1]) { + SetONNXGPUStream(*(nnApplications[lane].mModelReg1).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].mModelReg1).setDeviceId(deviceId); + if (nnApplications[lane].mModelReg1.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].mModelReg1.setIntraOpNumThreads(maxThreads); } - // (nnApplications[lane].model_reg_1).setEnv((nnApplications[lane].model_class).getEnv()); - (nnApplications[lane].model_reg_1).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_1).getEnv(), (nnApplications[lane].model_reg_1).getMemoryInfo(), mRec, recreateMemoryAllocator); - (nnApplications[lane].model_reg_1).initSession(); + // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv()); + (nnApplications[lane].mModelReg1).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].mModelReg1).initSession(); } - if (nnApplications[lane].modelsUsed[2]) { - SetONNXGPUStream(*(nnApplications[lane].model_reg_2).getSessionOptions(), lane, &deviceId); - (nnApplications[lane].model_reg_2).setDeviceId(deviceId); - if (nnApplications[lane].model_reg_2.getIntraOpNumThreads() > maxThreads) { - nnApplications[lane].model_reg_2.setIntraOpNumThreads(maxThreads); + if (nnApplications[lane].mModelsUsed[2]) { + SetONNXGPUStream(*(nnApplications[lane].mModelReg2).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].mModelReg2).setDeviceId(deviceId); + if (nnApplications[lane].mModelReg2.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].mModelReg2.setIntraOpNumThreads(maxThreads); } - (nnApplications[lane].model_reg_2).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); - (nnApplications[lane].model_reg_2).initSession(); + (nnApplications[lane].mModelReg2).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].mModelReg2).initSession(); } if (nn_settings.nnClusterizerVerbosity < 3) { LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId; @@ -689,14 +689,14 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector]; GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN; int32_t lane = sector % numLanes; - clustererNN.deviceId = deviceId; + clustererNN.mDeviceId = deviceId; clustererNN.mISector = sector; - clustererNN.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + clustererNN.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; nnApplications[lane].initClusterizer(nn_settings, clustererNN); if (doGPU) { - clustererNNShadow.deviceId = deviceId; + clustererNNShadow.mDeviceId = deviceId; clustererNNShadow.mISector = sector; - clustererNNShadow.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + clustererNNShadow.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow); } AllocateRegisteredMemory(clustererNN.mMemoryId); @@ -975,62 +975,62 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) int withMC = (doGPU && propagateMCLabels); - if (clustererNNShadow.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } // float time_clusterizer = 0, time_fill = 0, time_networks = 0; - for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) { - uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode; - size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode); batch++) { + uint batchStart = batch * clustererNNShadow.mNnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); // auto start0 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data + runKernel({GetGrid(iSize * clustererNNShadow.mNnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the data // auto stop0 = std::chrono::high_resolution_clock::now(); // auto start1 = std::chrono::high_resolution_clock::now(); // NN evaluations - if (clustererNNShadow.nnInferenceInputDType == 0) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_32); + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32); } - } else if (clustererNNShadow.nnInferenceInputDType == 1) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_32); + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32); } } - if (!clustererNNShadow.nnClusterizerUseCfRegression) { - if (clustererNNShadow.nnInferenceInputDType == 0) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_32); + if (!clustererNNShadow.mNnClusterizerUseCfRegression) { + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32); } - } else if (clustererNNShadow.nnInferenceInputDType == 1) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_32); + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32); } } - if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { - if (clustererNNShadow.nnInferenceInputDType == 0) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_32); + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32); } - } else if (clustererNNShadow.nnInferenceInputDType == 1) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_32); + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32); } } } @@ -1039,24 +1039,24 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // auto stopNNs = std::chrono::high_resolution_clock::now(); // Publishing kernels - if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels } else { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels } - if (!clustererNNShadow.nnClusterizerUseCfRegression) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results - if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results + if (!clustererNNShadow.mNnClusterizerUseCfRegression) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results } } // for(int i = 0; i < iSize; ++i) { - // if(clustererNNShadow.outputDataClass[i + batchStart] > 1) { - // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.modelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.outputDataClass[i + batchStart] << " thresh " << clustererNNShadow.nnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; + // if(clustererNNShadow.mOutputDataClass[i + batchStart] > 1) { + // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.mModelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.mOutputDataClass[i + batchStart] << " thresh " << clustererNNShadow.mNnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; // // std::string input = "["; - // // for(int j = 0; j < clustererNNShadow.nnClusterizerElementSize; j++){ - // // input += std::to_string(clustererNNShadow.inputData_16[i * clustererNNShadow.nnClusterizerElementSize + j].ToFloat()) + ", "; + // // for(int j = 0; j < clustererNNShadow.mNnClusterizerElementSize; j++){ + // // input += std::to_string(clustererNNShadow.mInputData_16[i * clustererNNShadow.mNnClusterizerElementSize + j].ToFloat()) + ", "; // // } // // input += "]"; // // LOG(info) << "Input is: " << input; @@ -1069,19 +1069,19 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; // time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; } - if (clustererNNShadow.nnClusterizerUseCfRegression) { + if (clustererNNShadow.mNnClusterizerUseCfRegression) { // auto start1 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 // auto stop1 = std::chrono::high_resolution_clock::now(); // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; } - // if (clustererNNShadow.nnClusterizerVerbosity < 3) { + // if (clustererNNShadow.mNnClusterizerVerbosity < 3) { // int acceptedClusters = 0; // for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { - // if(clustererNNShadow.outputDataClass[i] > 1 || clustererNNShadow.outputDataClass[i] < 0) { - // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.outputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; + // if(clustererNNShadow.mOutputDataClass[i] > 1 || clustererNNShadow.mOutputDataClass[i] < 0) { + // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.mOutputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; // } - // acceptedClusters += clustererNNShadow.outputDataClass[i]; + // acceptedClusters += clustererNNShadow.mOutputDataClass[i]; // } // LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; // } @@ -1187,9 +1187,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { // if (GetProcessingSettings().nn.applyNNclusterizer) { // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; - // nnApplication.model_class.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.model_reg_1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.model_reg_2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.mModelClass.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.mModelReg1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.mModelReg2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); // } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx index 092af2ea393c5..da37c0771fe84 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -25,69 +25,69 @@ void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {} void* GPUTPCNNClusterizer::setIOPointers(void* mem) { - if (nnClusterizerBatchedMode > 0) { - if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData_16, nnClusterizerBatchedMode * nnClusterizerElementSize); - } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData_32, nnClusterizerBatchedMode * nnClusterizerElementSize); + if (mNnClusterizerBatchedMode > 0) { + if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, mInputData_16, mNnClusterizerBatchedMode * mNnClusterizerElementSize); + } else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, mInputData_32, mNnClusterizerBatchedMode * mNnClusterizerElementSize); } - computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); + computePointerWithAlignment(mem, mClusterFlags, 2 * mNnClusterizerBatchedMode); - if (nnInferenceOutputDType == 0 && nnClusterizerElementSize > 0) { - if (nnClusterizerModelClassNumOutputNodes > 0) { - computePointerWithAlignment(mem, modelProbabilities_16, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + if (mNnInferenceOutputDType == 0 && mNnClusterizerElementSize > 0) { + if (mNnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, mModelProbabilities_16, mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes); } - if (!nnClusterizerUseCfRegression) { - if (nnClusterizerModelReg1NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg1_16, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + if (!mNnClusterizerUseCfRegression) { + if (mNnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg1_16, mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes); } - if (nnClusterizerModelReg2NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg2_16, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + if (mNnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg2_16, mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes); } } - } else if (nnInferenceOutputDType == 1 && nnClusterizerElementSize > 0) { - if (nnClusterizerModelClassNumOutputNodes > 0) { - computePointerWithAlignment(mem, modelProbabilities_32, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } else if (mNnInferenceOutputDType == 1 && mNnClusterizerElementSize > 0) { + if (mNnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, mModelProbabilities_32, mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes); } - if (!nnClusterizerUseCfRegression) { - if (nnClusterizerModelReg1NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg1_32, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + if (!mNnClusterizerUseCfRegression) { + if (mNnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg1_32, mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes); } - if (nnClusterizerModelReg2NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg2_32, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + if (mNnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg2_32, mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes); } } } } - if (nnClusterizerTotalClusters > 0) { - computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); + if (mNnClusterizerTotalClusters > 0) { + computePointerWithAlignment(mem, mOutputDataClass, mNnClusterizerTotalClusters); } return mem; } // std::vector GPUTPCNNClusterizer::pointerSizes() { // std::vector sizes(7, -1); -// if (nnClusterizerBatchedMode > 0) { -// if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { -// sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16 -// } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { -// sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32 +// if (mNnClusterizerBatchedMode > 0) { +// if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) { +// sizes[0] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData16 +// } else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) { +// sizes[1] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData32 // } -// sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags -// if (nnClusterizerModelClassNumOutputNodes > 0) { -// sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities +// sizes[2] = 2 * mNnClusterizerBatchedMode; // mClusterFlags +// if (mNnClusterizerModelClassNumOutputNodes > 0) { +// sizes[3] = mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes; // modelProbabilities // } -// if (!nnClusterizerUseCfRegression) { -// if (nnClusterizerModelReg1NumOutputNodes > 0) { -// sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1 +// if (!mNnClusterizerUseCfRegression) { +// if (mNnClusterizerModelReg1NumOutputNodes > 0) { +// sizes[4] = mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes; // outputDataReg1 // } -// if (nnClusterizerModelReg2NumOutputNodes > 0) { -// sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2 +// if (mNnClusterizerModelReg2NumOutputNodes > 0) { +// sizes[5] = mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes; // outputDataReg2 // } // } // } -// if (nnClusterizerTotalClusters > 0) { -// sizes[6] = nnClusterizerTotalClusters; // outputDataClass +// if (mNnClusterizerTotalClusters > 0) { +// sizes[6] = mNnClusterizerTotalClusters; // mOutputDataClass // } // return sizes; // } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h index 022642f9f142e..f7c2d13407b0e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -37,42 +37,42 @@ class GPUTPCNNClusterizer : public GPUProcessor // Neural network clusterization - int nnClusterizerSizeInputRow = 3; - int nnClusterizerSizeInputPad = 3; - int nnClusterizerSizeInputTime = 3; - int nnClusterizerElementSize = -1; - bool nnClusterizerAddIndexData = true; - float nnClassThreshold = 0.01; - bool nnSigmoidTrafoClassThreshold = 1; - int nnClusterizerUseCfRegression = 0; - int nnClusterizerBatchedMode = 1; - int nnClusterizerTotalClusters = 1; - int nnClusterizerVerbosity = 0; - int nnClusterizerBoundaryFillValue = -1; - int nnClusterizerModelClassNumOutputNodes = -1; - int nnClusterizerModelReg1NumOutputNodes = -1; - int nnClusterizerModelReg2NumOutputNodes = -1; - int nnInferenceInputDType = 0; // 0: float16, 1: float32 - int nnInferenceOutputDType = 0; // 0: float16, 1: float32 + int mNnClusterizerSizeInputRow = 3; + int mNnClusterizerSizeInputPad = 3; + int mNnClusterizerSizeInputTime = 3; + int mNnClusterizerElementSize = -1; + bool mNnClusterizerAddIndexData = true; + float mNnClassThreshold = 0.01; + bool mNnSigmoidTrafoClassThreshold = 1; + int mNnClusterizerUseCfRegression = 0; + int mNnClusterizerBatchedMode = 1; + int mNnClusterizerTotalClusters = 1; + int mNnClusterizerVerbosity = 0; + int mNnClusterizerBoundaryFillValue = -1; + int mNnClusterizerModelClassNumOutputNodes = -1; + int mNnClusterizerModelReg1NumOutputNodes = -1; + int mNnClusterizerModelReg2NumOutputNodes = -1; + int mNnInferenceInputDType = 0; // 0: float16, 1: float32 + int mNnInferenceOutputDType = 0; // 0: float16, 1: float32 int mISector = -1; - int deviceId = -1; + int mDeviceId = -1; // Memory allocation for neural network - bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr - int* outputDataClass = nullptr; + bool* mClusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr + int* mOutputDataClass = nullptr; // FP32 - float* inputData_32 = nullptr; - float* modelProbabilities_32 = nullptr; - float* outputDataReg1_32 = nullptr; - float* outputDataReg2_32 = nullptr; + float* mInputData_32 = nullptr; + float* mModelProbabilities_32 = nullptr; + float* mOutputDataReg1_32 = nullptr; + float* mOutputDataReg2_32 = nullptr; // FP16 - OrtDataType::Float16_t* inputData_16 = nullptr; - OrtDataType::Float16_t* modelProbabilities_16 = nullptr; - OrtDataType::Float16_t* outputDataReg1_16 = nullptr; - OrtDataType::Float16_t* outputDataReg2_16 = nullptr; + OrtDataType::Float16_t* mInputData_16 = nullptr; + OrtDataType::Float16_t* mModelProbabilities_16 = nullptr; + OrtDataType::Float16_t* mOutputDataReg1_16 = nullptr; + OrtDataType::Float16_t* mOutputDataReg2_16 = nullptr; int16_t mMemoryId = -1; }; // class GPUTPCNNClusterizer diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index 31b71fd8f1ebe..ca2deec60601c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -45,7 +45,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set } } - OrtOptions = { + mOrtOptions = { {"model-path", class_model_path}, {"device-type", settings.nnInferenceDevice}, {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)}, @@ -57,60 +57,60 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set {"logging-level", std::to_string(settings.nnInferenceVerbosity)}, {"onnx-environment-name", "c1"}}; - model_class.initOptions(OrtOptions); - modelsUsed[0] = true; + mModelClass.initOptions(mOrtOptions); + mModelsUsed[0] = true; reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':'); if (!settings.nnClusterizerUseCfRegression) { if (reg_model_paths_local.size() == 1) { - OrtOptions["model-path"] = reg_model_paths_local[0]; - OrtOptions["onnx-environment-name"] = "r1"; - model_reg_1.initOptions(OrtOptions); - modelsUsed[1] = true; + mOrtOptions["model-path"] = reg_model_paths_local[0]; + mOrtOptions["onnx-environment-name"] = "r1"; + mModelReg1.initOptions(mOrtOptions); + mModelsUsed[1] = true; } else { - OrtOptions["model-path"] = reg_model_paths_local[0]; - OrtOptions["onnx-environment-name"] = "r1"; - model_reg_1.initOptions(OrtOptions); - modelsUsed[1] = true; - OrtOptions["model-path"] = reg_model_paths_local[1]; - OrtOptions["onnx-environment-name"] = "r2"; - model_reg_2.initOptions(OrtOptions); - modelsUsed[2] = true; + mOrtOptions["model-path"] = reg_model_paths_local[0]; + mOrtOptions["onnx-environment-name"] = "r1"; + mModelReg1.initOptions(mOrtOptions); + mModelsUsed[1] = true; + mOrtOptions["model-path"] = reg_model_paths_local[1]; + mOrtOptions["onnx-environment-name"] = "r2"; + mModelReg2.initOptions(mOrtOptions); + mModelsUsed[2] = true; } } } void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN) { - clustererNN.nnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression; - clustererNN.nnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; - clustererNN.nnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; - clustererNN.nnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; - clustererNN.nnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; - clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); - clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; - clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; - clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; - if (clustererNN.nnSigmoidTrafoClassThreshold) { - clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold)); + clustererNN.mNnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression; + clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; + clustererNN.mNnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; + clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; + clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; + clustererNN.mNnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); + clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; + clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; + clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; + if (clustererNN.mNnSigmoidTrafoClassThreshold) { + clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold)); } else { - clustererNN.nnClassThreshold = settings.nnClassThreshold; + clustererNN.mNnClassThreshold = settings.nnClassThreshold; } if (settings.nnClusterizerVerbosity < 0) { - clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity; + clustererNN.mNnClusterizerVerbosity = settings.nnInferenceVerbosity; } else { - clustererNN.nnClusterizerVerbosity = settings.nnClusterizerVerbosity; + clustererNN.mNnClusterizerVerbosity = settings.nnClusterizerVerbosity; } - clustererNN.nnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos; - clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos; - clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + clustererNN.mNnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos; + clustererNN.mNnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos; + clustererNN.mNnClusterizerModelClassNumOutputNodes = mModelClass.getNumOutputNodes()[0][1]; if (!settings.nnClusterizerUseCfRegression) { - if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) { - clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + if (mModelClass.getNumOutputNodes()[0][1] == 1 || !mModelReg2.isInitialized()) { + clustererNN.mNnClusterizerModelReg1NumOutputNodes = mModelReg1.getNumOutputNodes()[0][1]; } else { - clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; - clustererNN.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + clustererNN.mNnClusterizerModelReg1NumOutputNodes = mModelReg1.getNumOutputNodes()[0][1]; + clustererNN.mNnClusterizerModelReg2NumOutputNodes = mModelReg2.getNumOutputNodes()[0][1]; } } } @@ -199,20 +199,20 @@ void MockedOrtAllocator::LeakCheck() void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) { - mockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); + mMockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); if (recreate) { Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo))); } - Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc.get())); - memInfo = (Ort::MemoryInfo*)mockedAlloc->Info(); + Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mMockedAlloc.get())); + memInfo = (Ort::MemoryInfo*)mMockedAlloc->Info(); } const OrtMemoryInfo* GPUTPCNNClusterizerHost::getMockedMemoryInfo() { - return mockedAlloc->Info(); + return mMockedAlloc->Info(); } MockedOrtAllocator* GPUTPCNNClusterizerHost::getMockedAllocator() { - return mockedAlloc.get(); + return mMockedAlloc.get(); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index 0379b83d0ae02..e659753f21d7d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -57,13 +57,11 @@ class GPUTPCNNClusterizerHost MockedOrtAllocator* getMockedAllocator(); const OrtMemoryInfo* getMockedMemoryInfo(); - std::unordered_map OrtOptions; - o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters - std::vector modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2 - int32_t deviceId = -1; - std::vector reg_model_paths; - - std::shared_ptr mockedAlloc = nullptr; + std::unordered_map mOrtOptions; + o2::ml::OrtModel mModelClass, mModelReg1, mModelReg2; // For splitting clusters + std::vector mModelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2 + int32_t mDeviceId = -1; + std::shared_ptr mMockedAlloc = nullptr; }; // class GPUTPCNNClusterizerHost } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 413293502d3c6..47bc5e8da80ca 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -40,7 +40,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); @@ -56,56 +56,56 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif - for (int r = -clustererNN.nnClusterizerSizeInputRow; r <= clustererNN.nnClusterizerSizeInputRow; r++) { + for (int r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); - for (int p = -clustererNN.nnClusterizerSizeInputPad + pad_offset; p <= clustererNN.nnClusterizerSizeInputPad + pad_offset; p++) { - bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); - for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) { + for (int p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { + bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); + for (int t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { if (!is_boundary) { CfChargePos tmp_pos(row + r, pad + p, time + t); - if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization - clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); - clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization + clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx]; } if (dtype == 0) { - clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); } else if (dtype == 1) { - clustererNN.inputData_32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + clustererNN.mInputData_32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; } } else { // Filling boundary just to make sure that no values are left unintentionally if (dtype == 0) { - clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.mNnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData_32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.mInputData_32[write_idx] = static_cast(clustererNN.mNnClusterizerBoundaryFillValue); } } write_idx++; } } } - if (clustererNN.nnClusterizerAddIndexData) { + if (clustererNN.mNnClusterizerAddIndexData) { if (dtype == 0) { - clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f); - clustererNN.inputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { - clustererNN.inputData_32[write_idx] = sector / 36.f; - clustererNN.inputData_32[write_idx + 1] = row / 152.f; - clustererNN.inputData_32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); + clustererNN.mInputData_32[write_idx] = sector / 36.f; + clustererNN.mInputData_32[write_idx + 1] = row / 152.f; + clustererNN.mInputData_32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); } } } @@ -116,62 +116,62 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()); - if (clustererNN.nnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.nnClusterizerElementSize - 1)) { - uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; + if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) { + uint top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; CfChargePos tmp_pos = peak.delta(d); - clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); - clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx]; } if (dtype == 0) { - clustererNN.inputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f); - clustererNN.inputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.mInputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.mInputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { - clustererNN.inputData_32[top_idx - 3] = sector / 36.f; - clustererNN.inputData_32[top_idx - 2] = row / 152.f; - clustererNN.inputData_32[top_idx - 1] = static_cast(pad) / GPUTPCGeometry::NPads(row); + clustererNN.mInputData_32[top_idx - 3] = sector / 36.f; + clustererNN.mInputData_32[top_idx - 2] = row / 152.f; + clustererNN.mInputData_32[top_idx - 1] = static_cast(pad) / GPUTPCGeometry::NPads(row); } - } else if ((int32_t)transient_index < (clustererNN.nnClusterizerElementSize - 3)) { + } else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) { int time = static_cast(peak.time()); - int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow; + int r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); if (is_row_boundary) { if (dtype == 0) { - clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.mNnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast(clustererNN.mNnClusterizerBoundaryFillValue); } } else { - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); - int rest_1 = transient_index % ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1)); - int p = CAMath::Floor(rest_1 / (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputPad + pad_offset; - bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); + int rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); + int p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); if (!is_boundary) { float central_charge = static_cast(chargeMap[peak].unpack()); - int t = (rest_1 % (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputTime; + int t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { - clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); } else if (dtype == 1) { - clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; } } else { if (dtype == 0) { - clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.mNnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast(clustererNN.mNnClusterizerBoundaryFillValue); } } } @@ -183,9 +183,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread processors.tpcNNClusterer[sector].nnClassThreshold); + processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold); } else if (dtype == 1) { - processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); + processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].mModelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].mNnClassThreshold); } } @@ -194,29 +194,29 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.modelProbabilities_16[pIdx]); + current_max_prob = static_cast(clustererNN.mModelProbabilities_16[pIdx]); } else if (dtype == 1) { - current_max_prob = clustererNN.modelProbabilities_32[pIdx]; + current_max_prob = clustererNN.mModelProbabilities_32[pIdx]; } } else { if (dtype == 0) { - current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_16[pIdx].ToFloat()); + current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat()); } else if (dtype == 1) { - current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_32[pIdx]); + current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]); } } } - // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" - clustererNN.outputDataClass[glo_idx + batchStart] = class_label; + // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" + clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label; if (class_label > 1) { - clustererNN.clusterFlags[2 * glo_idx] = 1; - clustererNN.clusterFlags[2 * glo_idx + 1] = 1; + clustererNN.mClusterFlags[2 * glo_idx] = 1; + clustererNN.mClusterFlags[2 * glo_idx + 1] = 1; } } @@ -235,11 +235,11 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread= 1)) { + if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; @@ -265,21 +265,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.pad()) + clustererNN.outputDataReg1_16[model_output_index].ToFloat(), - clustererNN.outputDataReg1_16[model_output_index + 2].ToFloat(), - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_16[model_output_index + 1].ToFloat(), - clustererNN.outputDataReg1_16[model_output_index + 3].ToFloat(), - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(), + static_cast(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(), + clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(), + clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(), + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } else if (dtype == 1) { - pc.setFull(central_charge * clustererNN.outputDataReg1_32[model_output_index + 4], - static_cast(peak.pad()) + clustererNN.outputDataReg1_32[model_output_index], - clustererNN.outputDataReg1_32[model_output_index + 2], - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_32[model_output_index + 1], - clustererNN.outputDataReg1_32[model_output_index + 3], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4], + static_cast(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index], + clustererNN.mOutputDataReg1_32[model_output_index + 2], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1], + clustererNN.mOutputDataReg1_32[model_output_index + 3], + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } tpc::ClusterNative myCluster; @@ -330,9 +330,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 0) { + if (clustererNN.mOutputDataClass[full_glo_idx] > 0) { ClusterAccumulator pc; @@ -358,21 +358,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 4].ToFloat(), - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 2].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 6].ToFloat(), - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(), + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(), + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } else if (dtype == 1) { - pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 8], - static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index], - clustererNN.outputDataReg2_32[model_output_index + 4], - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 2], - clustererNN.outputDataReg2_32[model_output_index + 6], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8], + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index], + clustererNN.mOutputDataReg2_32[model_output_index + 4], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2], + clustererNN.mOutputDataReg2_32[model_output_index + 6], + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } tpc::ClusterNative myCluster; @@ -403,21 +403,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index + 1].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 5].ToFloat(), - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 3].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 7].ToFloat(), - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(), + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(), + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } else if (dtype == 1) { - pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 9], - static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index + 1], - clustererNN.outputDataReg2_32[model_output_index + 5], - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 3], - clustererNN.outputDataReg2_32[model_output_index + 7], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9], + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1], + clustererNN.mOutputDataReg2_32[model_output_index + 5], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3], + clustererNN.mOutputDataReg2_32[model_output_index + 7], + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); From 67b81698f9c95edff48e630623b063a3c6fa9b51 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 10:10:26 +0200 Subject: [PATCH 0294/1764] GPU TPC: Make cluster rejection based on chi2 from current track position optional if in rejection based on interpolation mode --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 2d8c2184e3b02..4c32c3e46e3a7 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -151,6 +151,7 @@ AddOptionRTC(forceEarlyTransform, int8_t, -1, "", 0, "Force early TPC transforma AddOptionRTC(dropLoopers, uint8_t, 0, "", 0, "Drop looping tracks starting from second loop") AddOptionRTC(mergerCovSource, uint8_t, 2, "", 0, "Method to obtain covariance in track merger: 0 = simple filterErrors method, 1 = use cov from track following, 2 = refit (default)") AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation instead of extrapolation for chi2 based cluster rejection") +AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position") AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") AddOptionRTC(looperInterpolationInExtraPass, int8_t, -1, "", 0, "Perform looper interpolation in an extra pass") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 9e23f9af3cf43..90612fc98f836 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -691,7 +691,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || rejectChi2 == rejectInterReject, err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject), err2Y, err2Z, ¶m); } GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) From 8060987d94bb48a2b71c98066a8952630246723d Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 24 Apr 2025 12:06:38 +0200 Subject: [PATCH 0295/1764] Fix: TrackLTIntegral.addStep needs (q/p)^2 instead of 1/p^2 Since the charge dependence of beta is accounted internally. --- .../TrackLTIntegral.h | 2 +- .../TrackParametrization.h | 13 +++++++++++++ .../Reconstruction/src/TrackLTIntegral.cxx | 6 +++--- .../test/testLTOFIntegration.cxx | 4 ++-- Detectors/Base/src/Propagator.cxx | 18 +++++++++--------- Detectors/GlobalTracking/src/MatchTPCITS.cxx | 4 ++-- .../TRD/workflow/src/TRDGlobalTrackingSpec.cxx | 4 ++-- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h index 6cf9ceda8e195..e799804805972 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h @@ -63,7 +63,7 @@ class TrackLTIntegral } } - GPUd() void addStep(float dL, float p2Inv); + GPUd() void addStep(float dL, float q2p2); GPUd() void addX2X0(float d) { mX2X0 += d; } GPUd() void addXRho(float d) { mXRho += d; } diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index a51ec3b7010a7..bfd56eb8f024f 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -191,6 +191,7 @@ class TrackParametrization GPUd() value_t getPhi() const; GPUd() value_t getPhiPos() const; + GPUd() value_t getQ2P2() const; GPUd() value_t getPtInv() const; GPUd() value_t getP2Inv() const; GPUd() value_t getP2() const; @@ -555,6 +556,18 @@ GPUdi() auto TrackParametrization::getPhiPos() const -> value_t return phi; } +//____________________________________________________________ +template +GPUdi() auto TrackParametrization::getQ2P2() const -> value_t +{ + // return the (q/p)^2 + value_t q2pt2 = mP[kQ2Pt] * mP[kQ2Pt]; + if (q2pt2 < MinPTInv * MinPTInv) { + q2pt2 = MinPTInv * MinPTInv; + } + return q2pt2 / (1.f + getTgl() * getTgl()); +} + //____________________________________________________________ template GPUdi() auto TrackParametrization::getPtInv() const -> value_t diff --git a/DataFormats/Reconstruction/src/TrackLTIntegral.cxx b/DataFormats/Reconstruction/src/TrackLTIntegral.cxx index 3efddff00f512..426c3da04726c 100644 --- a/DataFormats/Reconstruction/src/TrackLTIntegral.cxx +++ b/DataFormats/Reconstruction/src/TrackLTIntegral.cxx @@ -39,9 +39,9 @@ GPUd() void TrackLTIntegral::print() const } //_____________________________________________________ -GPUd() void TrackLTIntegral::addStep(float dL, float p2Inv) +GPUd() void TrackLTIntegral::addStep(float dL, float q2p2) { - ///< add step in cm to integrals + ///< add step in cm to integrals, q2p2 is (q/p)^2. mL += dL; if (isTimeNotNeeded()) { return; @@ -49,7 +49,7 @@ GPUd() void TrackLTIntegral::addStep(float dL, float p2Inv) const float dTns = dL * 1000.f / o2::constants::physics::LightSpeedCm2NS; // time change in ps for beta = 1 particle for (int id = 0; id < getNTOFs(); id++) { const float m2z = track::PID::getMass2Z(id); - const float betaInv = math_utils::sqrt(1.f + m2z * m2z * p2Inv); + const float betaInv = math_utils::sqrt(1.f + m2z * m2z * q2p2); mT[id] += dTns * betaInv; } } diff --git a/DataFormats/Reconstruction/test/testLTOFIntegration.cxx b/DataFormats/Reconstruction/test/testLTOFIntegration.cxx index bb65c60d08d18..f737b1df53666 100644 --- a/DataFormats/Reconstruction/test/testLTOFIntegration.cxx +++ b/DataFormats/Reconstruction/test/testLTOFIntegration.cxx @@ -33,8 +33,8 @@ BOOST_AUTO_TEST_CASE(TrackLTIntegral) const int nStep = 100; const float dx2x0 = 0.01f; for (int i = 0; i < nStep; i++) { - lt.addStep(1., trc.getP2Inv()); - lt1.addStep(1., trc1.getP2Inv()); + lt.addStep(1., trc.getQ2P2()); + lt1.addStep(1., trc1.getQ2P2()); lt1.addX2X0(dx2x0); } trc.printParam(); diff --git a/Detectors/Base/src/Propagator.cxx b/Detectors/Base/src/Propagator.cxx index c7c7b461034e5..754c0c14e6f60 100644 --- a/Detectors/Base/src/Propagator.cxx +++ b/Detectors/Base/src/Propagator.cxx @@ -189,14 +189,14 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackParCov_t& track, va res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -258,14 +258,14 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackPar_t& track, value res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -324,14 +324,14 @@ GPUd() bool PropagatorImpl::propagateToX(TrackParCov_t& track, value_ty res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -390,14 +390,14 @@ GPUd() bool PropagatorImpl::propagateToX(TrackPar_t& track, value_type res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -717,7 +717,7 @@ GPUd() value_T PropagatorImpl::estimateLTFast(o2::track::TrackLTIntegra // since we assume the track or its parent comes from the beam-line or decay, add XY(?) distance to it value_T dcaT = math_utils::detail::sqrt(xdca * xdca + ydca * ydca); length += dcaT; - lt.addStep(length, trc.getP2Inv()); + lt.addStep(length, trc.getQ2P2()); return dcaT; } diff --git a/Detectors/GlobalTracking/src/MatchTPCITS.cxx b/Detectors/GlobalTracking/src/MatchTPCITS.cxx index f689caed87351..c8c9dda6a4025 100644 --- a/Detectors/GlobalTracking/src/MatchTPCITS.cxx +++ b/Detectors/GlobalTracking/src/MatchTPCITS.cxx @@ -1714,7 +1714,7 @@ bool MatchTPCITS::refitTrackTPCITS(int slot, int iTPC, int& iITS, pmr::vectorestimateLTIncrement(tracOut, posStart, posEnd); - tofL.addStep(lInt, tracOut.getP2Inv()); + tofL.addStep(lInt, tracOut.getQ2P2()); tofL.addX2X0(lInt * mTPCmeanX0Inv); propagator->PropagateToXBxByBz(tracOut, o2::constants::geom::XTPCOuterRef, MaxSnp, 10., mUseMatCorrFlag, &tofL); @@ -1804,7 +1804,7 @@ bool MatchTPCITS::refitABTrack(int iITSAB, const TPCABSeed& seed, pmr::vectorestimateLTIncrement(tracOut, posStart, posEnd); - tofL.addStep(lInt, tracOut.getP2Inv()); + tofL.addStep(lInt, tracOut.getQ2P2()); tofL.addX2X0(lInt * mTPCmeanX0Inv); propagator->PropagateToXBxByBz(tracOut, o2::constants::geom::XTPCOuterRef, MaxSnp, 10., mUseMatCorrFlag, &tofL); const auto& trackTune = TrackTuneParams::Instance(); diff --git a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx index 375fa732007cc..b5a1530e83d8c 100644 --- a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx +++ b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx @@ -638,7 +638,7 @@ bool TRDGlobalTracking::refitITSTPCTRDTrack(TrackTRD& trk, float timeTRD, o2::gl } auto posEnd = trk.getXYZGlo(); auto lInt = propagator->estimateLTIncrement(trk, posStart, posEnd); - trk.getLTIntegralOut().addStep(lInt, trk.getP2Inv()); + trk.getLTIntegralOut().addStep(lInt, trk.getQ2P2()); // trk.getLTIntegralOut().addX2X0(lInt * mTPCmeanX0Inv); // do we need to account for the material budget here? probably const auto& trackTune = TrackTuneParams::Instance(); @@ -733,7 +733,7 @@ bool TRDGlobalTracking::refitTPCTRDTrack(TrackTRD& trk, float timeTRD, o2::globa } auto posEnd = trk.getXYZGlo(); auto lInt = propagator->estimateLTIncrement(trk, posStart, posEnd); - trk.getLTIntegralOut().addStep(lInt, trk.getP2Inv()); + trk.getLTIntegralOut().addStep(lInt, trk.getQ2P2()); // trk.getLTIntegralOut().addX2X0(lInt * mTPCmeanX0Inv); // do we need to account for the material budget here? probably? if (!propagator->PropagateToXBxByBz(trk, o2::constants::geom::XTPCInnerRef, o2::base::Propagator::MAX_SIN_PHI, o2::base::Propagator::MAX_STEP, matCorr, &trk.getLTIntegralOut())) { From d56140caadf9045d58f85836bc9a0faed6a1a4c4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 11:16:19 +0200 Subject: [PATCH 0296/1764] GPU CMake: some cleanup / simplification --- GPU/Common/CMakeLists.txt | 6 +--- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 3 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +-- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +-- GPU/GPUTracking/CMakeLists.txt | 33 +++++++++---------- GPU/GPUTracking/Interface/CMakeLists.txt | 2 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 1 + .../Standalone/tools/dumpGPUDefParam.C | 2 +- GPU/GPUTracking/display/CMakeLists.txt | 4 +-- GPU/TPCFastTransformation/CMakeLists.txt | 6 ++-- GPU/Utils/CMakeLists.txt | 3 +- 11 files changed, 32 insertions(+), 36 deletions(-) diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 6951c3b2339b8..b1a4b2107019c 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -62,10 +62,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") COMPONENT_NAME GPU LABELS gpu) endif() - install(FILES ${HDRS_INSTALL} DESTINATION include/GPU) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - install(FILES ${HDRS_INSTALL} - DESTINATION include) -endif() +install(FILES ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index dbdf6b606df18..c31dd0c8d3fe2 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -120,7 +120,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_SOURCE_DIR}/DataFormats/Reconstruction/src ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) - install(FILES ${HDRS} DESTINATION include/GPU) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -132,6 +131,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") include_directories(${CMAKE_CURRENT_SOURCE_DIR}) endif() +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + target_compile_definitions(${targetName} PRIVATE $) if (onnxruntime_FOUND) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 4689fee02d31e..6eded3499e46e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -169,8 +169,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) - install(FILES ${HDRS} DESTINATION include/GPU) - # o2_add_test(GPUsortHIP NAME test_GPUsortHIP # SOURCES test/testGPUsortHIP.hip # PUBLIC_LINK_LIBRARIES O2::GPUCommon hip::host hip::device hip::hipcub roc::rocthrust @@ -187,6 +185,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") include_directories(${GPUCA_HIP_SOURCE_DIR}) endif() +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + target_compile_definitions(${targetName} PRIVATE $) if (onnxruntime_FOUND) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 7ab70553958ef..99ec36615a1d1 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -96,8 +96,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") # the compile_defitions are not propagated automatically on purpose (they are # declared PRIVATE) so we are not leaking them outside of the GPU** # directories - - install(FILES ${HDRS} DESTINATION include/GPU) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -107,6 +105,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName ${MODULE}) endif() +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + if(OPENCL_ENABLED_SPIRV) target_compile_definitions(${targetName} PRIVATE OPENCL_ENABLED_SPIRV) endif() diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 4c1de17025627..ec60d41484f81 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -242,12 +242,13 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h Definitions/GPUDefParametersLoad.template.inc) set(GENERATED_HEADERS_LIST "") -file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) +set(ON_THE_FLY_DIR ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) +file(MAKE_DIRECTORY ${ON_THE_FLY_DIR}) foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) get_filename_component(OUTPUT_FILE_NAME ${TEMPLATE_FILE} NAME) string(REPLACE ".template" "" OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}) - file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) - list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) + file(GENERATE OUTPUT ${ON_THE_FLY_DIR}/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) + list(APPEND GENERATED_HEADERS_LIST ${ON_THE_FLY_DIR}/${OUTPUT_FILE_NAME}) endforeach() set(GPUDEFPARAMETERSLBLIST "$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n") string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>\n") @@ -256,14 +257,14 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase CONTENT ${GPUDEFPARAMETERSLBLIST}) add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h - COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + OUTPUT ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h + COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h COMMENT "Generating GPUDefParametersLoadPrepare.h" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase VERBATIM COMMAND_EXPAND_LISTS ) -list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) +list(APPEND GENERATED_HEADERS_LIST ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h) set(HDRS_INSTALL ${HDRS_INSTALL} ${GENERATED_HEADERS_LIST}) include(kernels.cmake) @@ -306,7 +307,7 @@ set(INCDIRS ${CMAKE_CURRENT_SOURCE_DIR}/Debug ${CMAKE_CURRENT_SOURCE_DIR}/DataCompression ${CMAKE_CURRENT_SOURCE_DIR}/TPCClusterFinder - ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) + ${ON_THE_FLY_DIR}) # Main CMake part for O2 if(ALIGPU_BUILD_TYPE STREQUAL "O2") @@ -356,12 +357,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} LINKDEF GPUTrackingLinkDef_O2.h) - install(FILES ${HDRS_SRCS} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_INSTALL} - DESTINATION include/GPU) - install(DIRECTORY utils - DESTINATION include/GPU - FILES_MATCHING PATTERN *.h) - o2_add_test_root_macro(Standalone/tools/createGeo.C PUBLIC_LINK_LIBRARIES O2::GPUTracking LABELS its COMPILE_ONLY) @@ -390,13 +385,15 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") else() target_compile_definitions(${targetName} PRIVATE GPUCA_NO_ROOT) endif() - install(FILES ${HDRS_SRCS} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_INSTALL} - DESTINATION include) - install(DIRECTORY utils - DESTINATION include - FILES_MATCHING PATTERN *.h) endif() +install(FILES ${HDRS_SRCS} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_INSTALL} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) +install(DIRECTORY utils + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU + FILES_MATCHING PATTERN *.h) + + # GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h) set_source_files_properties(Base/GPUReconstructionLibrary.cxx diff --git a/GPU/GPUTracking/Interface/CMakeLists.txt b/GPU/GPUTracking/Interface/CMakeLists.txt index baec5b941b4b6..e532b24f97908 100644 --- a/GPU/GPUTracking/Interface/CMakeLists.txt +++ b/GPU/GPUTracking/Interface/CMakeLists.txt @@ -46,4 +46,4 @@ target_include_directories(${targetName} target_compile_definitions(${targetName} PRIVATE $) -install(FILES ${HDRS} DESTINATION include/GPU) +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 0859223187f00..088269ae73cab 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR) project(GPUTrackingStandalone) include(FeatureSummary) +include(GNUInstallDirs) set(CMAKE_INSTALL_MESSAGE LAZY) set(CMAKE_INSTALL_BINDIR "${CMAKE_INSTALL_PREFIX}") diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index 785c049816252..dcb12db6c9118 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -13,7 +13,7 @@ /// \author David Rohr // Run e.g. as: -// ROOT_INCLUDE_PATH="`pwd`/include" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' +// ROOT_INCLUDE_PATH="`pwd`/include/GPU" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' // Logic for testing to load the default parameters /* #define GPUCA_GPUCODE diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index 592ba3b38ff30..d7d08f7f06101 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -133,8 +133,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") target_compile_definitions(${targetName} PRIVATE GPUCA_BUILD_EVENT_DISPLAY_GLFW GPUCA_DISPLAY_GL3W GPUCA_DISPLAY_OPENGL_CORE) - install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION include/GPU) - o2_add_executable(field-uniform-exporter TARGETVARNAME exporterName COMPONENT_NAME gpu @@ -157,6 +155,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION displayTrackFilter) endif() +install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + target_compile_definitions(${targetName} PRIVATE $) message(STATUS "Building GPU Event Display (Vulkan ${GPUCA_EVENT_DISPLAY_VULKAN}, Wayland ${GPUCA_EVENT_DISPLAY_WAYLAND}, Freetype ${GPUCA_EVENT_DISPLAY_FREETYPE}, Fontconfig ${Fontconfig_FOUND}, Qt ${GPUCA_EVENT_DISPLAY_QT})") diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index c7869467d15b5..317169c05f731 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -64,7 +64,6 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") HEADERS ${HDRS_CINT_O2} LINKDEF TPCFastTransformationLinkDef_O2.h) - install(FILES ${HDRS_CINT_O2} DESTINATION include/GPU) file(COPY ${HDRS_CINT_O2} DESTINATION ${CMAKE_BINARY_DIR}/stage/include/GPU) o2_add_test(${MODULE} @@ -115,6 +114,8 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") LABELS gpu tpc) endforeach() + install (FILES macro/TPCFastTransformInit.C + DESTINATION share/macro/) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -123,8 +124,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") target_include_directories(${targetName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) endif() -install (FILES macro/TPCFastTransformInit.C - DESTINATION share/macro/) +install(FILES ${HDRS_CINT_O2} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) # # FIXME: this one is misplaced : it depends (at least) on TPCSimulation which is diff --git a/GPU/Utils/CMakeLists.txt b/GPU/Utils/CMakeLists.txt index bf23c792c2034..5ea8b59db5cad 100644 --- a/GPU/Utils/CMakeLists.txt +++ b/GPU/Utils/CMakeLists.txt @@ -31,5 +31,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT} LINKDEF GPUUtilsLinkDef.h) - install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) endif() + +install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) From 68c73f4ca4c851d3542be7acbd217f3fd68a4127 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 11:17:39 +0200 Subject: [PATCH 0297/1764] GPU CMake: Create const param files for available default architectures to be loaded by RTC --- GPU/GPUTracking/CMakeLists.txt | 24 +++++++++++++++++ .../Definitions/GPUDefParametersDefaults.h | 11 ++++---- GPU/GPUTracking/Standalone/CMakeLists.txt | 1 + .../Standalone/tools/dumpGPUDefParam.C | 27 +++++++++---------- GPU/GPUTracking/display/CMakeLists.txt | 4 +-- GPU/TPCFastTransformation/CMakeLists.txt | 4 +-- 6 files changed, 47 insertions(+), 24 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index ec60d41484f81..b2852389398d0 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -407,6 +407,30 @@ target_sources(${targetName} FILES ${GENERATED_HEADERS_LIST} BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) +make_directory(${CMAKE_CURRENT_BINARY_DIR}/genGPUArch) +set(GPU_CONST_PARAM_ARCHITECTUES AMPERE TURING VEGA MI2xx) +set(GPU_CONST_PARAM_FILES "") +foreach(GPU_ARCH ${GPU_CONST_PARAM_ARCHITECTUES}) + set(PARAMFILE ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch/gpu_const_param_${GPU_ARCH}.par) + add_custom_command( + OUTPUT ${PARAMFILE} + COMMAND bash -c + "echo -e '#define GPUCA_GPUTYPE_${GPU_ARCH}\\n#define PARAMETER_FILE \"GPUDefParametersDefaults.h\"\\ngInterpreter->AddIncludePath(\"${CMAKE_CURRENT_SOURCE_DIR}/Definitions\");\\ngInterpreter->AddIncludePath(\"${ON_THE_FLY_DIR}\");\\n.x ${CMAKE_CURRENT_SOURCE_DIR}/Standalone/tools/dumpGPUDefParam.C(\"${PARAMFILE}\")\\n.q\\n'" + | root -l -b > /dev/null + VERBATIM + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch + MAIN_DEPENDENCY Standalone/tools/dumpGPUDefParam.C + DEPENDS Definitions/GPUDefParametersDefaults.h + ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h + ${ON_THE_FLY_DIR}/GPUDefParametersLoad.inc + COMMENT "Generating GPU parameter set for architecture ${GPU_ARCH}") + LIST(APPEND GPU_CONST_PARAM_FILES ${PARAMFILE}) +endforeach() + +add_custom_target(${MODULE}_GPU_CONST_PARAM_ARCHS ALL DEPENDS ${GPU_CONST_PARAM_FILES}) +install(FILES ${GPU_CONST_PARAM_FILES} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU/arch_param) + + # Add compile definitions and libraries depending on available optional dependencies if(GPUCA_QA) message(STATUS "Building GPU QA") diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index a56fb97771fe5..cdc5efd56ddfd 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -22,6 +22,7 @@ // GPU Run Configuration #if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. + #define GPUCA_LB_SCAN 512 // GPU-architecture-dependent default settings #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 @@ -498,11 +499,11 @@ #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_LB_SCAN #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 088269ae73cab..c112be6abac11 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -242,3 +242,4 @@ install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${O2_DIR} ${CMAKE_INSTALL_PREFIX}/src)") install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_BINARY_DIR}/config.cmake ${CMAKE_INSTALL_PREFIX}/config.cmake)") +install(DIRECTORY tools DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU) diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index dcb12db6c9118..f82c537956ead 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -12,27 +12,24 @@ /// \file dumpGPUDefParam.C /// \author David Rohr -// Run e.g. as: -// ROOT_INCLUDE_PATH="`pwd`/include/GPU" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' +// Run e.g. as (replacing [FILE] and [OUTPUT]: +// echo -e '#define PARAMETER_FILE "[FILE]]"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("[OUTPUT]")\n.q\n' | root -l -b +// To dump the defaults for AMPERE architecture, run +// echo -e '#define GPUCA_GPUTYPE_AMPERE\n#define PARAMETER_FILE "GPUDefParametersDefaults.h"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("default_AMPERE.par")\n.q\n' | root -l -b -// Logic for testing to load the default parameters -/* #define GPUCA_GPUCODE -#define GPUCA_GPUTYPE_AMPERE -#define GPUCA_MAXN 40 -#define GPUCA_ROW_COUNT 152 -#define GPUCA_TPC_COMP_CHUNK_SIZE 1024 -#include "GPUDefParametersConstants.h" -#include "GPUDefParametersDefaults.h" */ +#ifndef PARAMETER_FILE +#error Must provide the PARAMETER_FILE as preprocessor define, e.g. -DHEADER_TO_INCLUDE='"GPUDefParametersDefaults.h"' +#endif -// Alternatively, logic to load file that sets GPUDefParameters -#include "testParam.h" +#define GPUCA_GPUCODE +#include PARAMETER_FILE #include "GPUDefParametersLoad.inc" -void dumpGPUDefParam() +void dumpGPUDefParam(const char* outputfile = "parameters.out") { auto param = o2::gpu::internal::GPUDefParametersLoad(); - printf("Loaded params:\n%s", o2::gpu::internal::GPUDefParametersExport(param, false).c_str()); - FILE* fp = fopen("parameters.out", "w+b"); + printf("Loaded params:\n%s\nWriting them to %s\n", o2::gpu::internal::GPUDefParametersExport(param, false).c_str(), outputfile); + FILE* fp = fopen(outputfile, "w+b"); fwrite(¶m, 1, sizeof(param), fp); fclose(fp); } diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index d7d08f7f06101..a59d5189d6235 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -149,10 +149,10 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") target_link_libraries(${targetName} PUBLIC O2::GPUTracking) install(TARGETS ${MODULE}) - install(DIRECTORY filterMacros/ DESTINATION displayTrackFilter FILES_MATCHING PATTERN "*.C") + install(DIRECTORY filterMacros/ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU/displayTrackFilter FILES_MATCHING PATTERN "*.C") get_property(GPU_DISPLAY_INCLUDE_PATH DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) configure_file(filterMacros/setinclude.sh.in setinclude.sh @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION displayTrackFilter) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU/displayTrackFilter) endif() install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index 317169c05f731..182a66fb28296 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -114,8 +114,8 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") LABELS gpu tpc) endforeach() - install (FILES macro/TPCFastTransformInit.C - DESTINATION share/macro/) + install(FILES macro/TPCFastTransformInit.C + DESTINATION share/macro/) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") From 3684fcc3d08b87fd2a0ff2d9077586a15d191e77 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 13:59:44 +0200 Subject: [PATCH 0298/1764] GPU: Remove obsolete .gitignore files --- GPU/GPUTracking/.gitignore | 1 - GPU/GPUTracking/Standalone/.gitignore | 5 ----- GPU/GPUTracking/Standalone/tools/.gitignore | 5 ----- GPU/GPUTracking/utils/.gitignore | 17 ----------------- 4 files changed, 28 deletions(-) delete mode 100644 GPU/GPUTracking/.gitignore delete mode 100644 GPU/GPUTracking/Standalone/.gitignore delete mode 100644 GPU/GPUTracking/Standalone/tools/.gitignore delete mode 100644 GPU/GPUTracking/utils/.gitignore diff --git a/GPU/GPUTracking/.gitignore b/GPU/GPUTracking/.gitignore deleted file mode 100644 index 7669bc79c7059..0000000000000 --- a/GPU/GPUTracking/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/utils/.svn diff --git a/GPU/GPUTracking/Standalone/.gitignore b/GPU/GPUTracking/Standalone/.gitignore deleted file mode 100644 index d0d3c34e96452..0000000000000 --- a/GPU/GPUTracking/Standalone/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/config_options.mak -/release -/ca -/ca.exe -/libGPUTracking*.so diff --git a/GPU/GPUTracking/Standalone/tools/.gitignore b/GPU/GPUTracking/Standalone/tools/.gitignore deleted file mode 100644 index 6ddce7f156a23..0000000000000 --- a/GPU/GPUTracking/Standalone/tools/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.o -*.bc -*.cl -*.spv -*.spirv diff --git a/GPU/GPUTracking/utils/.gitignore b/GPU/GPUTracking/utils/.gitignore deleted file mode 100644 index ff145e23751f6..0000000000000 --- a/GPU/GPUTracking/utils/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -/get_private_profile.h -/os_low_level_helper.h -/affinity.cxx -/affinity.h -/qmath.h -/qmultialloc.* -/qmalloc.* -/sched_affinity_win32_wrapper.h -/switchtemplate.h -/util_adl.cxx -/util_adl.h -/vecpodtest.cxx -/*.cpp -/*.sh -/.svn -/as -/callvc.bat From 32319712858a1882e8826a7e62129ac619a548f9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 22:45:14 +0200 Subject: [PATCH 0299/1764] Update / add documentation for FST --- .../documentation/dpl-workflow-options.md | 55 ++++++++ .../documentation/env-variables.md | 51 +++++++ .../full-system-test-as-stress-test.md | 33 +++++ .../documentation/full-system-test-setup.md | 124 ++++++++++++++++++ .../full-system-test.md} | 6 +- .../documentation/raw-data-simulation.md | 43 ++++++ 6 files changed, 309 insertions(+), 3 deletions(-) create mode 100644 prodtests/full-system-test/documentation/dpl-workflow-options.md create mode 100644 prodtests/full-system-test/documentation/env-variables.md create mode 100644 prodtests/full-system-test/documentation/full-system-test-as-stress-test.md create mode 100644 prodtests/full-system-test/documentation/full-system-test-setup.md rename prodtests/full-system-test/{README.md => documentation/full-system-test.md} (95%) create mode 100644 prodtests/full-system-test/documentation/raw-data-simulation.md diff --git a/prodtests/full-system-test/documentation/dpl-workflow-options.md b/prodtests/full-system-test/documentation/dpl-workflow-options.md new file mode 100644 index 0000000000000..f79e481ce0723 --- /dev/null +++ b/prodtests/full-system-test/documentation/dpl-workflow-options.md @@ -0,0 +1,55 @@ +# Configuration options +You can use the following options to change the workflow behavior: +- `DDMODE` (default `processing`) : Must be `processing` (synchronous processing) or `processing-disk` (synchronous processing + storing of raw time frames to disk, note that this is the raw time frame not the CTF!). The `DDMODE` `discard` and `disk` are not compatible with the synchronous processing workflow, you must use the `no-processing.desc` workflow instead!. +- `WORKFLOW_DETECTORS` (default `ALL`) : Comma-separated list of detectors for which the processing is enabled. If these are less detectors than participating in the run, data of the other detectors is ignored. If these are more detectors than participating in the run, the processes for the additional detectors will be started but will not do anything. +- `WORKFLOW_DETECTORS_QC` (default `ALL`) : Comma-separated list of detectors for which to run QC, can be a subset of `WORKFLOW_DETECTORS` (for standalone detectors QC) and `WORKFLOW_DETECTORS_MATCHING` (for matching/vertexing QC). If a detector (matching/vertexing step) is not listed in `WORKFLOW_DETECTORS` (`WORKFLOW_DETECTORS_MATCHING`), the QC is automatically disabled for that detector. Only active if the `WORKFLOW_PARAMETER=QC` is set. +- `WORKFLOW_DETECTORS_CALIB` (default `ALL`) : Comma-separated list of detectors for which to run calibration, can be a subset of `WORKFLOW_DETECTORS`. If a detector is not listed in `WORKFLOW_DETECTORS`, the calibration is automatically disabled for that detector. Only active if the `WORKFLOW_PARAMETER=CALIB` is set. +- `WORKFLOW_DETECTORS_FLP_PROCESSING` (default `TOF` for sync processing on EPN, `NONE` otherwise) : Signals that these detectors have processing on the FLP enabled. The corresponding steps are thus inactive in the EPN epl-workflow, and the raw-proxy is configured to receive the FLP-processed data instead of the raw data in that case. +- `WORKFLOW_DETECTORS_RECO` (default `ALL`) : Comma-separated list of detectors for which to run reconstruction. +- `WORKFLOW_DETECTORS_CTF` (default `ALL`) : Comma-separated list of detectors to include in CTF. +- `WORKFLOW_DETECTORS_MATCHING` (default selected corresponding to default workflow for sync or async mode respectively) : Comma-separated list of matching / vertexing algorithms to run. Use `ALL` to enable all of them. Currently supported options (see LIST_OF_GLORECO in common/setenv.h): `ITSTPC`, `TPCTRD`, `ITSTPCTRD`, `TPCTOF`, `ITSTPCTOF`, `MFTMCH`, `PRIMVTX`, `SECVTX`. +- `WORKFLOW_EXTRA_PROCESSING_STEPS` Enable additional processing steps not in the preset for the SYNC / ASYNC mode. Possible values are: `MID_RECO` `MCH_RECO` `MFT_RECO` `FDD_RECO` `FV0_RECO` `ZDC_RECO` `ENTROPY_ENCODER` `MATCH_ITSTPC` `MATCH_TPCTRD` `MATCH_ITSTPCTRD` `MATCH_TPCTOF` `MATCH_ITSTPCTOF` `MATCH_MFTMCH` `MATCH_MFTMCH` `MATCH_PRIMVTX` `MATCH_SECVTX`. (Here `_RECO` means full async reconstruction, and can be used to enable it also in sync mode.) +- `WORKFLOW_PARAMETERS` (default `NONE`) : Comma-separated list, enables additional features of the workflow. Currently the following features are available: + - `GPU` : Performs the TPC processing on the GPU, otherwise everything is processed on the CPU. + - `CTF` : Write the CTF to disk (CTF creation is always enabled, but if this parameter is missing, it is not stored). + - `EVENT_DISPLAY` : Enable JSON export for event display. + - `QC` : Enable QC. + - `CALIB` : Enable calibration (not yet working!) +- `RECO_NUM_NODES_OVERRIDE` (default `0`) : Overrides the number of EPN nodes used for the reconstruction (`0` or empty means default). +- `MULTIPLICITY_FACTOR_RAWDECODERS` (default `1`) : Scales the number of parallel processes used for raw decoding by this factor. +- `MULTIPLICITY_FACTOR_CTFENCODERS` (default `1`) : Scales the number of parallel processes used for CTF encoding by this factor. +- `MULTIPLICITY_FACTOR_REST` (default `1`) : Scales the number of other reconstruction processes by this factor. +- `QC_JSON_EXTRA` (default `NONE`) : extra QC jsons to add (if does not fit to those defined in WORKFLOW_DETECTORS_QC & (WORKFLOW_DETECTORS | WORKFLOW_DETECTORS_MATCHING) +Most of these settings are configurable in the AliECS GUI. But some of the uncommon settings (`WORKFLOW_DETECTORS_FLP_PROCESSING`, `WORKFLOW_DETECTORS_CTF`, `WORKFLOW_DETECTORS_RECO`, `WORKFLOW_DETECTORS_MATCHING`, `WORKFLOW_EXTRA_PROCESSING_STEPS`, advanced `MULTIPLICITY_FACTOR` settings) can only be set via the "Additional environment variables field" in the GUI using bash syntax, e.g. `WORKFLOW_DETECTORS_FLP_PROCESSING=TPC`. + +# Process multiplicity factors +- The production workflow has internally a default value how many instances of a process to run in parallel (which was tuned for Pb-Pb processing) +- Some critical processes for synchronous pp processing are automatically scaled by the inverse of the number of nodes, i.e. the multiplicity is increased by a factor of 2 if 125 instead of 250 nodes are used, to enable the processing using only a subset of the nodes. +- Factors can be provided externally to scale the multiplicity of processes further. All these factors are multiplied. + - One factor can be provided based on the type of the processes: raw decoder (`MULTIPLICITY_FACTOR_RAWDECODERS`), CTF encoder (`MULTIPLICITY_FACTOR_CTFENCODERS`), or other reconstruction process (`MULTIPLICITY_FACTOR_REST`) + - One factor can be provided per detector via `MULTIPLICITY_FACTOR_DETECTOR_[DET]` using the 3 character detector representation, or `MATCH` for the global matching and vertexing workflows. + - One factor can be provided per process via `MULTIPLICITY_FACTOR_PROCESS_[PROCESS_NAME]`. In the process name, dashes `-` must be replaced by underscores `_`. +- The multiplicity of an individual process can be overridden externally (this is an override, no scaling factor) by using `MULTIPLICITY_PROCESS_[PROCESS_NAME]`. In the process name, dashes `-` must be replaced by underscores `_`. +- For example, creating the workflow with `MULTIPLICITY_FACTOR_RAWDECODERS=2 MULTIPLICITY_FACTOR_DETECTOR_ITS=3 MULTIPLICITY_FACTOR_PROCESS_mft_stf_decoder=5` will scale the number of ITS raw decoders by 6, of other ITS processes by 3, of other raw decoders by 2, and will run exactly 5 `mft-stf-decoder` processes. + +# Additional custom control variables +For user modification of the workflow settings, the folloing *EXTRA* environment variables exist: +- `ARGS_ALL_EXTRA` : Extra command line options added to all workflows +- `ALL_EXTRA_CONFIG` : Extra config key values added to all workflows +- `GPU_EXTRA_CONFIG` : Extra options added to the configKeyValues of the GPU workflow +- `ARGS_EXTRA_PROCESS_[WORKFLOW_NAME]` : Extra command line arguments for the workflow binary `WORKFLOW_NAME`. Dashes `-` must be replaced by underscores `_` in the name! E.g. `ARGS_EXTRA_PROCESS_o2_tof_reco_workflow='--output-type clusters'` +- `CONFIG_EXTRA_PROCESS_[WORKFLOW_NAME]` : Extra `--configKeyValues` arguments for the workflow binary `WORKFLOW_NAME`. Dashes `-` must be replaced by underscores `_` in the name! E.g. `CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow='GPU_proc.debugLevel=1;GPU_proc.ompKernels=0;'` + +**IMPORTANT:** When providing additional environment variables please always use single quotes `'` instead of double quotes `"`, because otherwise there can be issues with whitespaces. E.g. `ARGS_EXTRA_PROCESS_o2_eve_display='--filter-time-min 0 --filter-time-max 120'` does work while `ARGS_EXTRA_PROCESS_o2_eve_display="--filter-time-min 0 --filter-time-max 120"` does not. + +In case the CTF dictionaries were created from the data drastically different from the one being compressed, the default memory allocation for the CTF buffer might be insufficient. One can apply scaling factor to the buffer size estimate (default=1.5) of particular detector by defining variable e.g. `TPC_ENC_MEMFACT=3.5` + +# File input for ctf-reader / raw-tf-reader +- The variable `$INPUT_FILE_LIST` can be a comma-seperated list of files, or a file with a file-list of CTFs/raw TFs. +- The variable `$INPUT_FILE_COPY_CMD` can provide a custom copy command (default is to fetch the files from EOS). + +# Remarks on QC +The JSON files for the individual detectors are merged into one JSON file, which is cached during the run on the shared EPN home folder. +The default JSON file per detector is defined in `qc-workflow.sh`. +JSONs per detector can be overridden by exporting `QC_JSON_[DETECTOR_NAME]`, e.g. `QC_JSON_TPC`, when creating the workflow. +The global section of the merged qc JSON config is taken from qc-sync/qc-global.json diff --git a/prodtests/full-system-test/documentation/env-variables.md b/prodtests/full-system-test/documentation/env-variables.md new file mode 100644 index 0000000000000..b93622c0a0f94 --- /dev/null +++ b/prodtests/full-system-test/documentation/env-variables.md @@ -0,0 +1,51 @@ +The `setenv-sh` script sets the following environment options +* `NTIMEFRAMES`: Number of time frames to process. +* `TFDELAY`: Delay in seconds between publishing time frames (1 / rate). +* `NGPUS`: Number of GPUs to use, data distributed round-robin. +* `GPUTYPE`: GPU Tracking backend to use, can be CPU / CUDA / HIP / OCL / OCL2. +* `SHMSIZE`: Size of the global shared memory segment. +* `DDSHMSIZE`: Size of shared memory unmanaged region for DataDistribution Input. +* `GPUMEMSIZE`: Size of allocated GPU memory (if GPUTYPE != CPU) +* `HOSTMEMSIZE`: Size of allocated host memory for GPU reconstruction (0 = default). + * For `GPUTYPE = CPU`: TPC Tracking scratch memory size. (Default 0 -> dynamic allocation.) + * Otherwise : Size of page-locked host memory for GPU processing. (Defauls 0 -> 1 GB.) +* `CREATECTFDICT`: Create CTF dictionary. +* `SAVECTF`: Save the CTF to a root file. + * 0: Read `ctf_dictionary.root` as input. + * 1: Create `ctf_dictionary.root`. Note that this was already done automatically if the raw data was simulated with `full_system_test.sh`. +* `SYNCMODE`: Run only reconstruction steps of the synchronous reconstruction. + * Note that there is no `ASYNCMODE` but instead the `CTFINPUT` option already enforces asynchronous processing. +* `NUMAGPUIDS`: NUMAID-aware GPU id selection. Needed for the full EPN configuration with 8 GPUs, 2 NUMA domains, 4 GPUs per domain. + In this configuration, 2 instances of `dpl-workflow.sh` must run in parallel. + To be used in combination with `NUMAID` to select the id per workflow. + `start_tmux.sh` will set up these variables automatically. +* `NUMAID`: SHM segment id to use for shipping data as well as set of GPUs to use (use `0` / `1` for 2 NUMA domains, 0 = GPUS `0` to `NGPUS - 1`, 1 = GPUS `NGPUS` to `2 * NGPUS - 1`) +* 0: Runs all reconstruction steps, of sync and of async reconstruction, using raw data input. +* 1: Runs only the steps of synchronous reconstruction, using raw data input. +* `EXTINPUT`: Receive input from raw FMQ channel instead of running o2-raw-file-reader. + * 0: `dpl-workflow.sh` can run as standalone benchmark, and will read the input itself. + * 1: To be used in combination with either `datadistribution.sh` or `raw-reader.sh` or with another DataDistribution instance. +* `CTFINPUT`: Read input from CTF ROOT file. This option is incompatible to EXTINPUT=1. The CTF ROOT file can be stored via SAVECTF=1. +* `NHBPERTF`: Time frame length (in HBF) +* `GLOBALDPLOPT`: Global DPL workflow options appended to o2-dpl-run. +* `EPNPIPELINES`: Set default EPN pipeline multiplicities. + Normally the workflow will start 1 dpl device per processor. + For some of the CPU parts, this is insufficient to keep step with the GPU processing rate, e.g. one ITS-TPC matcher on the CPU is slower than the TPC tracking on multiple GPUs. + This option adds some multiplicies for CPU processes using DPL's pipeline feature. + The settings were tuned for EPN processing with 4 GPUs (i.e. the default multiplicities are per NUMA domain). + The multiplicities are scaled with the `NGPUS` setting, i.e. with 1 GPU only 1/4th are applied. + You can pass an option different to 1, and than it will be applied as factor on top of the multiplicities. + It is auto-selected by `start-tmux.sh`. +* `SEVERITY`: Log verbosity (e.g. info or error, default: info) +* `INFOLOGGER_SEVERITY`: Min severity for messages sent to Infologger. (default: `$SEVERITY`) +* `SHMTHROW`: Throw exception when running out of SHM memory. + It is suggested to leave this enabled (default) on tests on the laptop to get an actual error when it runs out of memory. + This is disabled in `start_tmux.sh`, to avoid breaking the processing while there is a chance that another process might free memory and we can continue. +* `NORATELOG`: Disable FairMQ Rate Logging. +* `INRAWCHANNAME`: FairMQ channel name used by the raw proxy, must match the name used by DataDistribution. +* `WORKFLOWMODE`: run (run the workflow (default)), print (print the command to stdout), dds (create partial DDS topology) +* `FILEWORKDIR`: directory for all input / output files. E.g. grp / geometry / dictionaries etc. are read from here, and dictionaries / ctf / etc. are written to there. + Some files have more fine grained control via other environment variables (e.g. to store the CTF to somewhere else). Such variables are initialized to `$FILEWORKDIR` by default but can be overridden. +* `EPNSYNCMODE`: Specify that this is a workflow running on the EPN for synchronous processing, e.g. logging goes to InfoLogger, DPL metrics to to the AliECS monitoring, etc. +* `BEAMTYPE`: Beam type, must be PbPb, pp, pPb, cosmic, technical. +* `IS_SIMULATED_DATA` : 1 for MC data, 0 for RAW data. diff --git a/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md new file mode 100644 index 0000000000000..0c4637ece0920 --- /dev/null +++ b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md @@ -0,0 +1,33 @@ +This is a quick summary how to run the full system test (FST) as stress test on the EPN. (For the full FST documentation, see https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md and https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test.md) + +# Preparing the data set +- I usually try to keep an up-to-date data set that can be used in `/home/drohr/alitest/tmp-fst*`. The folder with the highest number is the latest dataset. However, data formats are still evolving, and it requires rerunning the simulation regularly. I.e. please try my latest data set, if it doesn't work, please generate a new one as described below. +- Short overview how to generate a FST Pb-Pb 128 orbit data set: + - The O2 binaries installed on the EPN via RPMs use the `o2-dataflow` defaults and cannot run the simulation, and also they lack readout. Thus you need to build `O2PDPSuite` and `Readout` (the version matching the O2PDPSuite RPM you want to use for running the test) yourself with `alibuild` on an EPN: `aliBuild --defaults o2 build O2PDPSuite Readout --jobs 32 --debug`. The flag `--jobs` configures the number of parallel jobs and can be changed. + - Enter the O2PDPSuite environment either vie `alienv enter O2PDPSuite/latest Readout/latest`. + - Go to an empty directory. + - Run the FST simulation via: `NEvents=650 NEventsQED=10000 SHMSIZE=128000000000 TPCTRACKERSCRATCHMEMORY=40000000000 SPLITTRDDIGI=0 GENERATE_ITSMFT_DICTIONARIES=1 $O2_ROOT/prodtests/full_system_test.sh` + - Get a current matbud.root (e.g. from here https://alice.its.cern.ch/jira/browse/O2-2288) and place it in that folder. + - Create a timeframe file from the raw files: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. + - Prepare the ramdisk folder: `mv raw/timeframe raw/timeframe-org; mkdir raw/timeframe-tmpfs; ln -s timeframe-tmpfs raw/timeframe` + +# Running the full system test +- Enter the environment! On an EPN do `module load O2PDPSuite` (this will load the latest O2 software installed on that EPN). +- Go into the folder with the data set (you might need to create one, see above). +- Prepare the ramdisk with the data: `sudo mount -t tmpfs tmpfs raw/timeframe-tmpfs; sudo cp raw/timeframe-org/* raw/timeframe` + - (NOTE that the ramdisk might already be present from previous tests, or in a different folder. Check the mounted tmpfs filesystems (`mount | grep tmpfs`), and don't mount multiple of them since memory is critical!) + - If you do not have root permissions and cannot create a ramdisk, the test will also work without. In that case you should decrease the publishing rate below to `TFDELAY=5`. +- Make sure disk caches are cleared: as ROOT do: `echo 1 > /proc/sys/vm/drop_caches` +- In order to run the Full System Test, the workflow must be able to access the CCDB. Normally, if you run as user, you must make sure to have an alien token present. On the EPN, one can use the EPN-internal CCDB server instead, which does not require alien access. If you use the `start-tmux.sh`, the env variables are set automatically to access the EPN-internal CCDB server. +- Start the FST with 2 NUMA domains: `TFDELAY=2.5 NTIMEFRAMES=1000000 $O2_ROOT/prodtests/full-system-test/start_tmux.sh dd` + +This will start a tmux session with 3 shells, the upper 2 shells are the 2 DPL workflows, one per NUMA domain, for the processing. The lower shell is the input with DataDistribution's StfBuilder. Leave it running and check that the StfBuilder doesn't complain that its buffer is full. Then the EPN can sustain the rate. + +# **NOTE** +- Attached to this ticket is a screenshot of how the console should look like: + - The DD console (on the bottom) should not show warnings about full buffers. + - The other 2 consoles (1 per NUMA domain) should show the processing times per TF for the GPU reconstruction: + ``` + [2974450:gpu-reconstruction_t3]: [10:50:38][INFO] GPU Reoncstruction time for this TF 26.77 s (cpu), 17.8823 s (wall) + ``` + This should be 17 to 18 seconds, and you should see it for all 4 GPUs on both NUMA domains (`reconstruction_t0` to `reconstruction_t3`) diff --git a/prodtests/full-system-test/documentation/full-system-test-setup.md b/prodtests/full-system-test/documentation/full-system-test-setup.md new file mode 100644 index 0000000000000..82ef9b7d0c74f --- /dev/null +++ b/prodtests/full-system-test/documentation/full-system-test-setup.md @@ -0,0 +1,124 @@ +This is some documentation for the full system test setup. + +If you just want to test a small dataset, you can skip the following steps, and jusddt skip to the end, where you will find a download with a prepared data set! + +# Requirements: +- The FST needs a lot of memory. Please check the comments below, make sure your system has enough memory, and change the memory sizes in the command lines accordingly. +- ulimits: The FST needs large ulimits for memory and virtual memory (`ulimit -m` / `ulimit -v`). This is usually no problem since they are usually unlimited. If GPUs are used, the FST also needs `ulimit -l` (for locked memory) unlimited, which is usualy not the system default. Finally, if data is replayed from raw files (not with DataDistribution), the FST will open many files, and `ulimit -n` should be at least 4096. Note that in most distributions the hard ulimits are configured in `/etc/security/limits.conf`. +- The FST needs to access the CCDB. For this, you should run the FST with an alien token. Alternatively, if you are on the EPN you can use the EPN-internal CCDB server by exporting `ALL_EXTRA_CONFIG="NameConf.mCCDBServer=http://o2-ccdb.internal;"` and by setting the DPL CCDB backend on the command line. If you are using `start-tmux.sh` for the 8 GPU FST, the CCDB backends are set automatically. + +# Creating the raw data and run the FST: +1. First some remarks on the number of events and the memory size: + - Generation (simulation) of the full time frame with ~550 collisions will need ~256 GB, processing will take less. + - Due to the sampling of the bunch crossings, the exact number of collissions that will be in the TF is not clear, thus one should simulate 600 collisions to generate a full 128 orbit TF. + - The default shared memory size is 2 GB, and must be increased significantly for large time frames, 128 GB is sufficient for 128 orbit TF, 160 GB is needed if MC labels are present in addition. + - The GPU memory allocation should be set to ~13 GB for 70 orbits and 21 GB for 128 orbits. + - I'd suggest to do a first small test with 1-5 events to check the machinery, 100 events is already a good size which should not exhaust the memory, I'd go to 600 only after 100 works. +1. Compile O2 with GPU support, in addition you need O2sim, DataDistribution, and Readout (latest versions from alidist will do). + GPUs for O2 should be auto-detected, but you can set the environment variables ALIBUILD_ENABLE_CUDA / ALIBUILD_ENABLE_HIP to enforce it (and get a failure when detection fails). Look for CMake log messages "Building GPUTracking with CUDA support" (etc) to verify. + For more information, see https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build.md +1. Optionally place some binary configuration files in the simulation folder. Default objects will be used if no such files are placed. There are instructions at the end of this post how to generate these files. (Currently, these files are: matbud.root, ITSdictionary.bin, ctf_dictionary.root, tpctransform.root, dedxsplines.root, and tpcpadgaincalib.root) +1. Load the O2sim environment (`alienv enter O2sim/latest`) and run the following full system test script for a full simulation and digits to raw conversion (this will already include 1 CPU reconstruction run): + ``` + NEvents=600 NEventsQED=35000 SHMSIZE=128000000000 TPCTRACKERSCRATCHMEMORY=30000000000 $O2_ROOT/prodtests/full_system_test.sh + ``` + - This create a full 128 orbit TF with 550 collisions and uses 35000 interactions for the QED background + - It uses 128 GB of shared memory + - The scratch memory size for the TPC reconstruction is set to 24 GB (Note, this is the CPU-equivalent of the GPU memory size, since this phase will only run on the CPU). +1. Test of the workflow using the raw-file-reader: Run the so far largest workflow, The GPU and SHM memory sizes must be reasonably large (see above). + ``` + SHMSIZE=128000000000 NTIMEFRAMES=10 TFDELAY=100 GPUTYPE=CPU $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` + Note that This uses 128 GB of SHM, runs only on the CPU, and processes the time frame 10 times in a loop with 100 s delay between the publiushing. + - For a documentation of the options, see https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test.md + - For running on the GPU (4 GPUs with the HIP backend), please do + ``` + SHMSIZE=128000000000 NTIMEFRAMES=10 TFDELAY=10 GPUTYPE=HIP NGPUS=4 GPUMEMSIZE=22000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` +This will use 4 GPU with the HIP backend and allocate 22 GB of scratch memory on the GPU (should be sufficient for 128 orbit TF). You can change the GPU type as indicated in the linked README.md above, e.g. `GPUTYPE=CUDA NGPUS=1` for 1 CUDA GPU. +1. With this, the full chain is running inside O2 DPL. Next we are adding DataDistribution. + 1. Ceate the TF files as explained in the subtask (https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-data-simulation.md). For convenience, there is a script that should do it automatically, from a shell that has loaded both DataDistribution and Readout: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. + 1. Enter the O2 environment, and run the following script (please adjust the variables as in the test before). + ``` + EXTINPUT=1 SHMSIZE=128000000000 GPUTYPE=CPU $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` + - As a first optional test without DataDistribution, we can take the RawReader to feed the data in the way DataDistribution does. Run the following script in a second shell within the O2 environment. (Please adjust the variables as noted above) + ``` + SHMSIZE=128000000000 NTIMEFRAMES=10 TFDELAY=100 $O2_ROOT/prodtests/full-system-test/raw-reader.sh + ``` + 1. In a second shell with DataDistribution, run the following script (adjust the 2 variables for memory size as needed for your data, and set the TF_DIR variable to the folder where you recorded the time frame). Make sure you start this script ONLY AFTER the DPL workflow has fully started! There is no number of timeframes, it will run in an endless loop + ``` + SHMSIZE=128000000000 DDSHMSIZE=32000 TFDELAY=100 $O2_ROOT/prodtests/full-system-test/datadistribution.sh + ``` +1. The full chain that will be running on the EPN farm is a bit more complicated. It consists of: + - 2 instances of the dpl-workflow driving 4 GPUs each, one per NUMA domain. + - 1 instance of data distribution feeding a shared input buffer. + The following script runs the full system test in the 8 GPU EPN configuration using tmux with 3 sessions:{code}TFDELAY=2.8457 NTIMEFRAMES=128 $O2_ROOT/prodtests/full-system-test/start-tmux.sh dd{code} + - Note that number of GPUs / memory sizes are automatically set by start-tmux.sh. + - This TFDELAY is the rate for processing 1/250th of 50 kHz Pb-Pb with average time frames. Since the occupancy of your simulated timeframe will fluctuate, it is suggested to scale the TFDELAY linearly with the number of tpc clusters (shown in the console output of the dpl-workflow), with the average corresponding to 2.8457 s being 313028012 clusters. + - You can for testing alternatively use the rawreader instead of datadistribution as input in the start_tmux.sh script by passing rr instead of dd. +1. On the EPN, an SHM management tool owns the memory in the background and keeps it locked. This is done in order to speed up the startup. This behavior can be reproduced in the full system test, by setting the env variable `SHM_MANAGER_SHMID` to the shm id to be used (must be set for both `start_tmux.sh` and `shm-tool.sh`) you can juse use `SHM_MANAGER_SHMID=1` for a test) and running in a separate shell before starting `start_tmux.sh` + ``` + SHM_MANAGER_SHMID=1 SHMSIZE=$((128<<30)) DDSHMSIZE=$((128<<10)) $O2_ROOT/prodtests/full-system-test/shm-tool.sh + SHM_MANAGER_SHMID=1 TFDELAY=2.8457 NTIMEFRAMES=8 $O2_ROOT/prodtests/full-system-test/start-tmux.sh dd + ``` + +--- + +# Remarks for running with distortions: +1. To run the digitization with distortions, add the following to the digitizer command (using map inputSCDensity3D_8000_0 from file../InputSCDensityHistograms_8000events.root): + ``` + --distortionType 2 --initialSpaceChargeDensity=../InputSCDensityHistograms_8000events.root,inputSCDensity3D_8000_0 + ``` +1. To rerun the digitization with the same BC sampling for the collisions add + ``` + --incontext collisioncontext.root + ``` +1. To create the tpc fast transform map from the SCD object run: + ``` + root -l -q -b ~/alice/O2/Detectors/TPC/reconstruction/macro/createTPCSpaceChargeCorrection.C++'("../InputSCDensityHistograms_8000events.root", "inputSCDensity3D_8000_0")' + ``` +1. In order to use the fast transform map for TPC tracking, add to the tpc-recop-workflow: + ``` + --configKeyValues "GPU_global.transformationFile=tpctransform.root" + ``` + +--- + +# Remarks for creating other prerequisite binary files: +1. To create the CTF dictionary: Run the full system test workflow once setting the env variable CREATECTFDICT=1: + ``` + CREATECTFDICT=1 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` +1. Create the ITS pattern dictionary + ``` + o2-its-reco-workflow --trackerCA --disable-mc --configKeyValues "fastMultConfig.cutMultClusLow=30000;fastMultConfig.cutMultClusHigh=2000000;fastMultConfig.cutMultVtxHigh=500" + root -b -q ~/alice/O2/Detectors/ITSMFT/ITS/macros/test/CheckTopologies.C++ + ``` + - Note that the ITS dictionary used for raw generation and for reconstruction must be the same. I.e., if you change this, you have to either restart from scratch with the new dictionary file or rerun the ITS raw generation part of `$O2_ROOT/prodtests/full_system_test.sh`. +1. To create the material lookup table + ``` + root -l -q -b $O2_ROOT/Detectors/Base/test/buildMatBudLUT.C + ``` +1. missing here: dedxsplines.root, tpcpadgaincalib.root + +--- + +# Measuring startup time: +- In order to measure the time for each individual GPU memory registration step, please add `CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.benchmarkMemoryRegistration=1;"`. This should show you 2 times ~2 seconds per GPU process for the 2 large segments (DD and the global segment, could also report some additional smaller segments, only 1 in case you don't use the readout proxy). +- In order to measure the total startup time, you can use the `start_tmux.sh` script with the option `FST_BENCHMARK_STARTUP=1`. It will print for both DPL chains 2 times at the beginning: The first is when it starts the workflow JSON generation, the second is after the JSON generation when the actual workflow is started. For the process startup time, you have to take the difference from that time until the time when the last process has reched the READY state. (Note that this should be done with the `$O2_ROOT/prodtests/full-system-test/shm-tool.sh` as instructed above.) + ``` + Fri Jan 28 11:25:48 CET 2022 + Fri Jan 28 11:25:56 CET 2022 + [...] + [1456583:gpu-reconstruction_t0]: [11:26:18][INFO] fair::mq::Device running... + ``` + - This corresponds to a JSON creation time of 8 seconds (will usually not cound for the startup since it is cached, and a process startup time of 22 seconds. +--- + +# Other remarks:# Other remarks: +1. To run with low b-field, add to o2-sim: + ``` + --field -2 + ``` +1. To create a sample of multiple TF files for StfBuilder, use the script `$O2_ROOT/prodtests/full-system-test/generate_timeframe_files.sh`. diff --git a/prodtests/full-system-test/README.md b/prodtests/full-system-test/documentation/full-system-test.md similarity index 95% rename from prodtests/full-system-test/README.md rename to prodtests/full-system-test/documentation/full-system-test.md index a52dfbc5d1203..80cc08baa2255 100644 --- a/prodtests/full-system-test/README.md +++ b/prodtests/full-system-test/documentation/full-system-test.md @@ -10,7 +10,7 @@ The full system test consists of 2 parts (detailed below): The relevant scripts are `/prodtests/full_system_test.sh` and all scripts in `/prodtests/full-system-test`. Note that by default the `full_system_test.sh` script will do both, run the generation and then the sysc and the async workflow. -This is only a quickstart guide, for more information see https://alice.its.cern.ch/jira/browse/O2-1492. +This is only a quickstart guide, for more information see https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md. In order to run the full system test, you need to run in the O2sim environment (`alienv enter O2sim/latest`): ``` @@ -50,7 +50,7 @@ The generation part (in `prodtests/full_system_test.sh` runs the following steps The `prodtests/full_system_test.sh` uses `Utilities/Tools/jobutils.sh` for running the jobs, which creates a log file for each step, and which will automatically skip steps that have already succeeded if the test is rerun in the current folder. I.e. if you break the FST or it failed at some point, you can rerun the same command line and it will continue after the last successful step. See `Utilities/Tools/jobutils.sh` for details. Note that by default, the generation produces raw files, which can be consumed by the `raw-file-reader-workflow` and by `o2-readout-exe`. -The files can be converted into timeframes files readable by the StfBuilder as described in https://alice.its.cern.ch/jira/browse/O2-1492. +The files can be converted into timeframes files readable by the StfBuilder as described in https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md. ## Full system test DPL-workflow configuration and scripts @@ -80,7 +80,7 @@ The `dpl-workflow.sh` can run both the synchronous and the asynchronous workflow All settings are configured via environment variables. The default settings (if no env variable is exported) are defined in `setenv.sh` which is sourced by all other scripts. (Please note that `start_tmux.sh` overrides a couple of options with EPN defaults). -The environment variables are documented here: https://github.com/AliceO2Group/O2DPG/blob/master/DATA/common/README.md +The environment variables are documented here: https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-env-variables.md ## Files produced / required by the full system test diff --git a/prodtests/full-system-test/documentation/raw-data-simulation.md b/prodtests/full-system-test/documentation/raw-data-simulation.md new file mode 100644 index 0000000000000..fbf6ace7d6934 --- /dev/null +++ b/prodtests/full-system-test/documentation/raw-data-simulation.md @@ -0,0 +1,43 @@ +This procedure will create (S)TF files from raw data prepared as described in the main ticket. The data must be using RDHv6. +Create configuration for the readout.exe with all input files we want in the TF. This will create rdo_TF.cfg file. + +  +``` +ulimit -n 4096 # Make sure we can open sufficiently many files cd raw# ls raw: ITS TPC TOF ... + +# copy gen_rdo_cfg.sh script attached here to the raw directory +# Run the script with number of HBF/TF and list directories you want to include in the TF + +~raw> ./gen_rdo_cfg.sh 128 TPC ITS TOF # ... others{code} +```  + +In a separate shell load a recent DataDistribution module and start StfBuilder to record the TF: +``` +export TF_PATH=$(pwd) +StfBuilder --id=stfb --detector-rdh=6 --detector-subspec=feeid --stand-alone --channel-config "name=readout,type=pull,method=connect,address=ipc:///tmp/readout-to-datadist-0,transport=shmem,rateLogging=1" --data-sink-dir=${TF_PATH} --data-sink-sidecar --data-sink-enable +``` + +Start the readout.exe (at least v1.4.3) using the generated config file. The dataflow will have a 10-20 seconds of delay, in order to have all input files loaded. +``` +ulimit -n 4096 # Make sure we can open sufficiently many files +~raw> readout.exe file:rdo_TF.cfg{code} +``` +  +Upon data transfer to StfBuilder, readout will print the stats, like: +``` +2020-06-23 18:07:59.003364 Last interval (1.00s): blocksRx=0, block rate=0.00, bytesRx=0, rate=0.000 b/s +2020-06-23 18:08:00.003382 Last interval (1.00s): blocksRx=2930, block rate=2930.00, bytesRx=1156508880, rate=9.252 Gb/s +2020-06-23 18:08:01.003384 Last interval (1.00s): blocksRx=0, block rate=0.00, bytesRx=0, rate=0.000 b/s{noformat} +``` + +StfBuilder will print one warning regarding the timeout on the last received TF. This can be ignored in this case. The log should look like : + +```  +{noformat}[2020-06-23 18:07:59.928][I] readout[0]: in: 1224 (1156.52 MB) out: 0 (0 MB) +[2020-06-23 18:08:01.733][W] READOUT INTERFACE: finishing STF on a timeout. stf_id=1 size=1156508880 +[2020-06-23 18:08:02.607][I] Sending STF out. stf_id=1 channel=standalone-chan[0] stf_size=1156508880 unique_equipments=1224{noformat} +``` + +After this, both processes can be closed with Ctrl-C. The resulting TFs are stored in a new directory under TF_PATH (the name of the dir is the time of running) + +  From 8ed4d1083b9403972662c8bcf0cec3a29487e244 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 22:55:03 +0200 Subject: [PATCH 0300/1764] GPU: Add documentation --- GPU/documentation/README.md | 0 GPU/documentation/build-O2.md | 62 +++++++++++++++++++ GPU/documentation/build-standalone.md | 86 +++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 GPU/documentation/README.md create mode 100644 GPU/documentation/build-O2.md create mode 100644 GPU/documentation/build-standalone.md diff --git a/GPU/documentation/README.md b/GPU/documentation/README.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/GPU/documentation/build-O2.md b/GPU/documentation/build-O2.md new file mode 100644 index 0000000000000..809d1fe0d5439 --- /dev/null +++ b/GPU/documentation/build-O2.md @@ -0,0 +1,62 @@ +This ticket will serve as documentation how to enable which GPU features and collect related issues. + +So far, the following features exist: + * GPU Tracking with CUDA + * GPU Tracking with HIP + * GPU Tracking with OpenCL (>= 2.1) + * OpenGL visualization of the tracking + * ITS GPU tracking + +GPU support should be detected and enabled automatically. +If you just want to reproduce the GPU build locally without running it, it might be easiest to use the GPU CI container (see below). +The provisioning script of the container also demonstrates which patches need to be applied such that everything works correctly. + +*GPU Tracking with CUDA* + * The CMake option -DENABLE_CUDA=ON/OFF/AUTO steers whether CUDA is forced enabled / unconditionally disabled / auto-detected. + * The CMake option -DCUDA_COMPUTETARGET= fixes a GPU target, e.g. 61 for PASCAL or 75 for Turing (if unset, it compiles for the lowest supported architecture) + * CUDA is detected via the CMake language feature, so essentially nvcc must be in the Path. + * We require CUDA version >= 11.2 + * CMake will report "Building GPUTracking with CUDA support" when enabled. + +*GPU Tracking with HIP* + * HIP and HCC must be installed, and CMake must be able to detect HIP via find_package(hip). + * If HIP and HCC are not installed to /opt/rocm, the environment variables $HIP_PATH and $HCC_HOME must point to the installation directories. + * HIP from ROCm >= 4.0 is required. + * The CMake option -DHIP_AMDGPUTARGET= forces a GPU target, e.g. gfx906 for Radeon VII (if unset, it auto-detects the GPU). + * CMake will report "Building GPUTracking with HIP support" when enabled. + * It may be that some patches must be applied to ROCm after the installation. You find the details in the provisioning script of the GPU CI container below. + +*GPU Tracking with OpenCL (Needs Clang >= 18 for compilation)* + * Needs OpenCL library with version >= 2.1, detectable via CMake find_package(OpenCL). + * Needs the SPIR-V LLVM translator together with LLVM to create the SPIR-V binaries, also detectable via CMake. + +*OpenGL visualization of TPC tracking* + * Needs the following libraries (all detectable via CMake find_package): libOpenGL, libGLEW, libGLFW, libGLU. + * OpenGL must be at least version 4.5, but this is not detectable at CMake time. If the supported OpenGL version is below, the display is not/partially built, and not available at runtime. (Whether it is not or partially built depends on whether the maximum OpenGL version supported by GLEW or that of the system runtime in insufficient.) + * Note: If ROOT does not detect the system GLEW library, ROOT will install its own very outdated GLEW library, which will be insufficient for the display. Since the ROOT include path will come first in the order, this will prevent the display from being built. + * CMake will report "Building GPU Event Display" when enabled. + +*Vulkan visualization* + * similar to OpenCL visualization, but with Vulkan. + +*ITS GPU Tracking* + * So far supports only CUDA and HIP, support for OpenCL might come. + * The build is enabled when the "GPU Tracking with CUDA" (as explained above) detects CUDA, same for HIP. + * CMake will report "Building ITS CUDA tracker" when enabled, same for HIP. + +*Using the GPU CI container* + * Setting up everything locally might be somewhat time-consuming, instead you can use the GPU CI cdocker container. + * The docker images is `alisw/slc8-gpu-builder`. + * The container exports the `ALIBUILD_O2_FORCE_GPU` env variable, which force-enables all GPU builds. + * Note that it might not be possible out-of-the-box to run the GPU version from within the container. In case of HIP it should work when you forwards the necessary GPU devices in the container. For CUDA however, you would either need to (in addition to device forwarding) match the system CUDA driver and toolkit installation to the files present in the container, or you need to use the CUDA docker runtime, which is currently not installed in the container. + * There are currently some patches needed to install all the GPU backends in a proper way and together. Please refer to the container provisioning script https://github.com/alisw/docks/blob/master/slc9-gpu-builder/provision.sh. If you want to reproduce the installation locally, it is recommended to follow the steps from the script. + +*Summary* + +If you want to enforce the GPU builds on a system without GPU, please set the following CMake settings: + * ENABLE_CUDA=ON + * ENABLE_HIP=ON + * ENABLE_OPENCL=ON + * HIP_AMDGPUTARGET=gfx906;gfx908 + * CUDA_COMPUTETARGET=86 89 +Alternatively you can set the environment variables ALIBUILD_ENABLE_CUDA and ALIBUILD_ENABLE_HIP to enforce building CUDA or HIP without modifying the alidist scripts. diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md new file mode 100644 index 0000000000000..d4e9da5cd5bf3 --- /dev/null +++ b/GPU/documentation/build-standalone.md @@ -0,0 +1,86 @@ +This ticket describes how to build the O2 GPU TPC Standalone benchmark (in its 2 build types), and how to run it. + +The purpose of the standalone benchmark is to make the O2 GPU TPC reconstruction code available standalone. It provides +- external tests when people do not have / want to build O2, have no access to alien for CCDB, etc. +- fast standalone tests without running O2 workflows and overhead from CCTD. +- faster build times than rebuilding O2 for development. + +# Compiling + +The standalone benchmark is build as part of O2, and it can be built standalone. + +As part of O2, it is available from the normal O2 build as the executable `o2-gpu-standalone-benchmark`, GPU support is available for all GPU types supported by the O2 build. + +Building it as standalone benchmark requires several dependencies, and provides more control which features to enable / disable. +The dependencies can be taken from the system, or we can use alidist to build O2 and take the dependencies from there. + +In order to do the latter, please execute: +``` +cd ~/alice # or your alice folder +aliBuild build --defaults o2 O2 +source O2/GPU/GPUTracking/Standalone/cmake/prepare.sh +``` + +Then, in order to compile the standalone tool, assuming to have it in ~/standalone and build in ~/standalone/build, please run: +``` +mkdir -p ~/standalone/build +cd ~/standalone/build +cmake -DCMAKE_INSTALL_PREFIX=../ ~/alice/O2/GPU/GPUTracking/Standalone/ +nano config.cmake # edit config file to enable / disable dependencies as needed. In case cmake failed, and you disabled the dependency, just rerun the above command. +make install -j32 +``` + +You can edit certain build settings in `config.cmake`. Some of them are identical to the GPU build settings for O2, as described in O2-786. +And there are plenty of additional settings to enable/disable event display, qa, usage of ROOT, FMT, etc. libraries. + +This will create the `ca` binary in `~/standalone`, which is basically the same as the `o2-gpu-standalone-benchmark`, but built outside of O2. + +# Running + +The following command lines will use `./ca`, in case you use the executable from the O2 build, please replace by `o2-gpu-standalone-benchmark`. + +You can get a list of command line options by `./ca --help` and `./ca --helpall`. + +In order to run, you need a dataset. See the next section for how to create a dataset. Datasets are stored in `~/standalone/events`, and are identified by their folder names. The following commands assume a testdataset of name `o2-pbpb-100`. + +To run on that data, the simpled command is `./ca -e o2-pbpb-100`. This will automatically use a GPU if available, trying all backends, otherwise fall back to CPU. +You can force using GPU or CPU with `-g` and `-c`. +You can select the backend via `--gpuType CUDA|HIP|OCL|OCL2`, and inside the backend you can select the device number, if multiple devices exist, via `--gpuDevice i`. + +The flag `--debug` (-2 to 6) enables increasingly extensive debug output, and `--debug 6` stores full data dumpts of all intermediate steps to files. +>= `--debug 1` has a performance impact since it adds serialization points for debugging. For timing individual kernels, `--debug 1` prints timing information for all kernels. +An example line would .e.g. be +``` +./ca -e o2-pbpb-100 -g --gpuType CUDA --gpuDevice 0 --debug 1 +``` + +Some other noteworthy options are `--display` to run the GPU event display, `--qa` to run a QA task on MC data, `--runs` and `--runs2` to run multiple iterations of the benchmark, `--printSettings` to print all the settings that were used, `--memoryStat` to print memory statistics, `--sync` to run with settings for online reco, `--syncAsync` to run online reco first, and then offline reco on the produced TPC CTF data, `--setO2Settings` to use some defaults as they are in O2 not in the standalone version, `--PROCdoublePipeline` to enable the double-threaded pipeline for best performance (works only with multiple iterations, and not in async mode), and `--RTCenable` to enable the run time compilation improvements (check also `--RTCcacheOutput`). +An example for a benchmark in online mode would be: +``` +./ca -e o2-pbpb-100 -g --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 +``` + +# Generating a dataset + +The standalone benchmark supports running on Run2 data exported from AliRoot, or to run on Run3 data from O2. This document covers only the O2 case. +In o2, `o2-tpc-reco-workflow` and the `o2-gpu-reco-workflow` can dump event data with the `configKeyValue` `GPU_global.dump=1;`. +This will dump the event data to the local folder, all dumped files have a `.dump` file extension. If there are multiple TFs/events processed, there will be multiple `event.i.dump` files. In order to create a standalone dataset out of these, just copy all the `.dump` files to a subfolder in `~/standalone/events/[FOLDERNAME]`. + +Data can be dumped from raw data, or from MC data, e.g. generated by the Full System Test. In case of MC data, also MC labels are dumped, such that they are used in the `./ca --qa` mode. + +To get a dump from simulated data, please run e.g. the FST simulation as described in O2-2633. +A simple run as +``` +DISABLE_PROCESSING=1 NEvents=5 NEventsQED=100 SHMSIZE=16000000000 $O2_ROOT/prodtests/full_system_test.sh +``` +should be enough. + +Afterwards run the following command to dump the data: +``` +SYNCMODE=1 CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh +``` + +To dump standalone data from CTF raw data in `myctf.root`, you can use the same script, e.g.: +``` +CTFINPUT=1 INPUT_FILE_LIST=myctf.root CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh +``` From 80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 16:39:38 +0200 Subject: [PATCH 0301/1764] GPU: Make TPC CF CF_SCAN_WORKGROUP_SIZE configureable --- GPU/GPUTracking/Base/GPUProcessor.h | 18 ++++++++++++++--- .../Definitions/GPUDefParametersConstants.h | 2 -- .../Definitions/GPUDefParametersDefaults.h | 17 ++++++++++------ .../Global/GPUChainTrackingClusterizer.cxx | 14 +++++++------ .../GPUTPCCFStreamCompaction.cxx | 14 +++++++++++-- .../GPUTPCCFStreamCompaction.h | 14 ++++++------- .../TPCClusterFinder/GPUTPCClusterFinder.cxx | 20 +++++++++++-------- .../TPCClusterFinder/GPUTPCClusterFinder.h | 5 ++--- GPU/GPUTracking/kernels.cmake | 3 ++- 9 files changed, 69 insertions(+), 38 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUProcessor.h b/GPU/GPUTracking/Base/GPUProcessor.h index 2e0e0a003f87d..df551c9f0330d 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.h +++ b/GPU/GPUTracking/Base/GPUProcessor.h @@ -63,7 +63,7 @@ class GPUProcessor } template - static inline size_t getAlignmentMod(size_t addr) + static constexpr inline size_t getAlignmentMod(size_t addr) { static_assert((alignment & (alignment - 1)) == 0, "Invalid alignment, not power of 2"); if (alignment <= 1) { @@ -72,7 +72,7 @@ class GPUProcessor return addr & (alignment - 1); } template - static inline size_t getAlignment(size_t addr) + static constexpr inline size_t getAlignment(size_t addr) { size_t mod = getAlignmentMod(addr); if (mod == 0) { @@ -81,10 +81,22 @@ class GPUProcessor return (alignment - mod); } template - static inline size_t nextMultipleOf(size_t size) + static constexpr inline size_t nextMultipleOf(size_t size) { return size + getAlignment(size); } + static constexpr inline size_t nextMultipleOf(size_t size, size_t alignment) + { + if (alignment & (alignment - 1)) { + size_t tmp = size % alignment; + if (tmp) { + size += alignment - tmp; + } + return size; + } else { + return (size + alignment - 1) & ~(alignment - 1); + } + } template static inline void* alignPointer(void* ptr) { diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h index dd4a5dcbe7ba8..78036e47fc49d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -18,8 +18,6 @@ #define GPUDEFPARAMETERSCONSTANTS_H // clang-format off -#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! - #if defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index cdc5efd56ddfd..7879789bf91c8 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -22,7 +22,6 @@ // GPU Run Configuration #if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. - #define GPUCA_LB_SCAN 512 // GPU-architecture-dependent default settings #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 @@ -499,11 +498,11 @@ #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER @@ -541,6 +540,9 @@ #ifndef GPUCA_PAR_COMP_GATHER_MODE #define GPUCA_PAR_COMP_GATHER_MODE 2 #endif + #ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 512 + #endif #endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) #ifndef GPUCA_GPUCODE_GENRTC @@ -578,6 +580,9 @@ #ifndef GPUCA_PAR_NO_ATOMIC_PRECHECK #define GPUCA_PAR_NO_ATOMIC_PRECHECK 0 #endif + #ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 0 + #endif #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE #define GPUCA_PAR_DEDX_STORAGE_TYPE float #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 630c2200e5900..f188388e76a02 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -23,6 +23,7 @@ #include "CfChargePos.h" #include "CfArray2D.h" #include "GPUGeneralKernels.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCCFStreamCompaction.h" #include "GPUTPCCFChargeMapFiller.h" #include "GPUTPCCFDecodeZS.h" @@ -402,27 +403,28 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust exit(1); } + int32_t scanWorkgroupSize = mRec->getGPUParameters(doGPU).par_CF_SCAN_WORKGROUP_SIZE; size_t tmpCount = count; if (nSteps > 1) { for (uint32_t i = 1; i < nSteps; i++) { counts.push_back(tmpCount); if (i == 1) { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, stage); + runKernel({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, i, stage); } else { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, tmpCount); + runKernel({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, i, tmpCount); } - tmpCount = (tmpCount + clusterer.mScanWorkGroupSize - 1) / clusterer.mScanWorkGroupSize; + tmpCount = (tmpCount + scanWorkgroupSize - 1) / scanWorkgroupSize; } - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, nSteps, tmpCount); + runKernel({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, nSteps, tmpCount); for (uint32_t i = nSteps - 1; i > 1; i--) { tmpCount = counts[i - 1]; - runKernel({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, clusterer.mScanWorkGroupSize, tmpCount); + runKernel({GetGrid(tmpCount - scanWorkgroupSize, scanWorkgroupSize, lane), {iSector}}, i, scanWorkgroupSize, tmpCount); } } - runKernel({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSector}}, 1, stage, in, out); + runKernel({GetGrid(count, scanWorkgroupSize, lane), {iSector}}, 1, stage, in, out); } else { auto& nOut = stage ? clusterer.mPmemory->counters.nClusters : clusterer.mPmemory->counters.nPeaks; auto& nIn = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index 1da5a1158a8c2..d43e96b19c5d0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -24,6 +24,7 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage) { +#ifdef GPUCA_GPUCODE int32_t nElems = CompactionElems(clusterer, stage); const auto* predicate = clusterer.mPisPeak; @@ -35,17 +36,19 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread(smem, pred); + int32_t nElemsInBlock = CfUtils::blockPredicateSum(smem, pred); int32_t lastThread = nThreads - 1; if (iThread == lastThread) { scanOffset[iBlock] = nElemsInBlock; } +#endif } template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems) { +#ifdef GPUCA_GPUCODE auto* scanOffset = clusterer.GetScanBuffer(iBuf - 1); auto* scanOffsetNext = clusterer.GetScanBuffer(iBuf); @@ -59,11 +62,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread if (iThread == lastThread) { scanOffsetNext[iBlock] = offsetInBlock; } +#endif } template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems) { +#ifdef GPUCA_GPUCODE int32_t iThreadGlobal = get_global_id(0); int32_t* scanOffset = clusterer.GetScanBuffer(iBuf - 1); @@ -74,11 +79,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& /*smem*/, processorType& clusterer, int32_t iBuf, uint32_t offset, int32_t nElems) { +#ifdef GPUCA_GPUCODE int32_t iThreadGlobal = get_global_id(0) + offset; int32_t* scanOffsetPrev = clusterer.GetScanBuffer(iBuf - 1); @@ -89,11 +96,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out) { +#ifdef GPUCA_GPUCODE uint32_t nElems = CompactionElems(clusterer, stage); SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks; @@ -105,7 +114,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread= nElems); int32_t pred = (iAmDummy) ? 0 : predicate[iThreadGlobal]; - int32_t offsetInBlock = CfUtils::blockPredicateScan(smem, pred); + int32_t offsetInBlock = CfUtils::blockPredicateScan(smem, pred); SizeT globalOffsetOut = offsetInBlock; if (iBlock > 0) { @@ -129,6 +138,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Threadcounters.nPeaks = nFinal; } } +#endif } GPUdii() int32_t GPUTPCCFStreamCompaction::CompactionElems(processorType& clusterer, int32_t stage) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index a72907fe55e89..a5ea8b24e9522 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -35,14 +35,14 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate compactDigits = 4, }; - struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { - }; #if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits)); + struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { + }; + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits)); #endif typedef GPUTPCClusterFinder processorType; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index 051391f12cc6d..541edaa689c6c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -17,6 +17,7 @@ #include "GPUMemorySizeScalers.h" #include "GPUHostDataTypes.h" #include "GPUSettings.h" +#include "GPUDefParametersRuntime.h" #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/ZeroSuppression.h" @@ -90,9 +91,10 @@ void* GPUTPCClusterFinder::SetPointersScratch(void* mem) computePointerWithAlignment(mem, mPisPeak, mNMaxDigitsFragment); computePointerWithAlignment(mem, mPchargeMap, TPCMapMemoryLayout::items(mRec->GetProcessingSettings().overrideClusterizerFragmentLen)); computePointerWithAlignment(mem, mPpeakMap, TPCMapMemoryLayout::items(mRec->GetProcessingSettings().overrideClusterizerFragmentLen)); - computePointerWithAlignment(mem, mPbuf, mBufSize * mNBufs); computePointerWithAlignment(mem, mPclusterByRow, GPUCA_ROW_COUNT * mNMaxClusterPerRow); - + if ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) { + computePointerWithAlignment(mem, mPscanBuf, mBufSize * mNBufs); + } return mem; } @@ -129,14 +131,15 @@ void GPUTPCClusterFinder::SetMaxData(const GPUTrackingInOutPointers& io) if (mRec->GetProcessingSettings().tpcIncreasedMinClustersPerRow) { mNMaxClusterPerRow = std::max(mNMaxClusterPerRow, mRec->GetProcessingSettings().tpcIncreasedMinClustersPerRow); } - - mBufSize = nextMultipleOf(GPUCA_MEMALIGN, mScanWorkGroupSize)>(mNMaxDigitsFragment); - mNBufs = getNSteps(mBufSize); + if ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) { + mBufSize = nextMultipleOf(mNMaxDigitsFragment, std::max(GPUCA_MEMALIGN, mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE)); + mNBufs = getNSteps(mBufSize); + } } void GPUTPCClusterFinder::SetNMaxDigits(size_t nDigits, size_t nPages, size_t nDigitsFragment, size_t nDigitsEndpointMax) { - mNMaxDigits = nextMultipleOf(GPUCA_MEMALIGN, mScanWorkGroupSize)>(nDigits); + mNMaxDigits = nextMultipleOf(nDigits, std::max(GPUCA_MEMALIGN, mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE)); mNMaxPages = nPages; mNMaxDigitsFragment = nDigitsFragment; mNMaxDigitsEndpoint = nDigitsEndpointMax; @@ -148,9 +151,10 @@ uint32_t GPUTPCClusterFinder::getNSteps(size_t items) const return 0; } uint32_t c = 1; - size_t capacity = mScanWorkGroupSize; + const size_t scanWorkgroupSize = mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE; + size_t capacity = scanWorkgroupSize; while (items > capacity) { - capacity *= mScanWorkGroupSize; + capacity *= scanWorkgroupSize; c++; } return c; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 96efe08be6dc6..37399f5e4863f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -111,10 +111,10 @@ class GPUTPCClusterFinder : public GPUProcessor uint32_t* mPclusterInRow = nullptr; tpc::ClusterNative* mPclusterByRow = nullptr; GPUTPCClusterMCInterimArray* mPlabelsByRow = nullptr; - int32_t* mPbuf = nullptr; + int32_t* mPscanBuf = nullptr; Memory* mPmemory = nullptr; - GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPbuf + iBuf * mBufSize; } + GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPscanBuf + iBuf * mBufSize; } o2::dataformats::ConstMCTruthContainerView const* mPinputLabels = nullptr; uint32_t* mPlabelsInRow = nullptr; @@ -122,7 +122,6 @@ class GPUTPCClusterFinder : public GPUProcessor uint32_t mPlabelsDataGlobalOffset = 0; int32_t mISector = 0; - constexpr static int32_t mScanWorkGroupSize = GPUCA_THREAD_COUNT_SCAN; uint32_t mNMaxClusterPerRow = 0; uint32_t mNMaxClusters = 0; uint32_t mNMaxPages = 0; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 937a92fef33df..08d879fbb8e9a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -146,7 +146,8 @@ o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP NO_ATOMIC_PRECHECK COMP_GATHER_KERNEL COMP_GATHER_MODE - SORT_STARTHITS) + SORT_STARTHITS + CF_SCAN_WORKGROUP_SIZE) o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE MERGER_INTERPOLATION_ERROR_TYPE) From f1e0a1298ca6a1ba2bea97e4e8b403487dbfa13f Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Fri, 25 Apr 2025 10:14:07 +0200 Subject: [PATCH 0302/1764] Update CODEOWNERS for ALICE3 (#14221) @njacazio --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index a22b122d0e6cd..5337622522bbb 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -73,6 +73,7 @@ /Detectors/TPC @davidrohr @wiechula @shahor02 /Detectors/TRD @f3sch @bazinski @wille10 /Detectors/Upgrades @mconcas +/Detectors/Upgrades/ALICE3 @mconcas @njacazio /Detectors/Upgrades/ITS3 @fgrosa @arossi81 @mconcas @f3sch /Detectors/ZDC @coppedis /Detectors/CTF @shahor02 From fc3ace17eca580c338751163ef4528e3ec47f9d6 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Fri, 25 Apr 2025 10:14:46 +0200 Subject: [PATCH 0303/1764] ALICE3-TRK: Add skeleton for the TRK reconstruction WF (#14222) --------- Co-authored-by: ALICE Builder --- .../ALICE3/TRK/workflow/CMakeLists.txt | 22 ++-- .../include/TRKWorkflow/RecoWorkflow.h | 33 +++++ .../include/TRKWorkflow/TrackerSpec.h | 56 +++++++++ .../ALICE3/TRK/workflow/src/RecoWorkflow.cxx | 31 +++++ .../ALICE3/TRK/workflow/src/TrackerSpec.cxx | 116 ++++++++++++++++++ .../TRK/workflow/src/trk-reco-workflow.cxx | 80 ++++++++++++ 6 files changed, 326 insertions(+), 12 deletions(-) create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt index c9f4099017717..e86ed7982c85b 100644 --- a/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt @@ -10,23 +10,21 @@ # or submit itself to any jurisdiction. o2_add_library(TRKWorkflow + TARGETVARNAME targetName SOURCES src/DigitReaderSpec.cxx src/DigitWriterSpec.cxx - # src/RecoWorkflow.cxx - # src/ClusterWriterWorkflow.cxx - # src/ClustererSpec.cxx - # src/ClusterWriterSpec.cxx - # src/TrackerSpec.cxx - # src/TrackWriterSpec.cxx - # src/TrackReaderSpec.cxx - # src/VertexReaderSpec.cxx + src/TrackerSpec.cxx + src/RecoWorkflow.cxx PUBLIC_LINK_LIBRARIES O2::Framework + O2::GPUWorkflow O2::SimConfig O2::DataFormatsITSMFT O2::SimulationDataFormat O2::DPLUtils) -# o2_add_executable(reco-workflow -# SOURCES src/trk-reco-workflow.cxx -# COMPONENT_NAME alice3-trk -# PUBLIC_LINK_LIBRARIES O2::TRKWorkflow) \ No newline at end of file +o2_add_executable(reco-workflow + SOURCES src/trk-reco-workflow.cxx + COMPONENT_NAME alice3-trk + PUBLIC_LINK_LIBRARIES O2::TRKWorkflow + O2::TRKSimulation + O2::ITStracking) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h new file mode 100644 index 0000000000000..0c2489aa4b9c4 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h @@ -0,0 +1,33 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_TRK_RECOWORKFLOW_H +#define O2_TRK_RECOWORKFLOW_H + +#include "Framework/WorkflowSpec.h" +#include "GPUDataTypes.h" + +namespace o2::trk +{ +namespace reco_workflow +{ + +o2::framework::WorkflowSpec getWorkflow(bool useMC, + bool upstreamDigits = false, + bool upstreamClusters = false, + bool disableRootOutput = false, + bool useGPUWF = false, + o2::gpu::GPUDataTypes::DeviceType dType = o2::gpu::GPUDataTypes::DeviceType::CPU); +} + +} // namespace o2::trk + +#endif diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h new file mode 100644 index 0000000000000..3c82a4fd7b89d --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h @@ -0,0 +1,56 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @file TrackerSpec.h + +#ifndef O2_TRK_TRACKERDPL +#define O2_TRK_TRACKERDPL + +#include "DataFormatsITSMFT/TopologyDictionary.h" + +#include "Framework/DataProcessorSpec.h" +#include "Framework/Task.h" + +#include "ITStracking/TrackingInterface.h" +#include "GPUDataTypes.h" + +#include "DetectorsBase/GRPGeomHelper.h" + +#include "TStopwatch.h" + +namespace o2::trk +{ +class TrackerDPL : public framework::Task +{ + public: + TrackerDPL(std::shared_ptr gr, + bool isMC, + gpu::GPUDataTypes::DeviceType dType = gpu::GPUDataTypes::DeviceType::CPU); + ~TrackerDPL() override = default; + void init(framework::InitContext& ic) final; + void run(framework::ProcessingContext& pc) final; + void endOfStream(framework::EndOfStreamContext& ec) final; + // void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj) final; + void stop() final; + + private: + void updateTimeDependentParams(framework::ProcessingContext& pc); + // std::unique_ptr mRecChain = nullptr; + // std::unique_ptr mChainITS = nullptr; + // std::shared_ptr mGGCCDBRequest; + // ITSTrackingInterface mITSTrackingInterface; + TStopwatch mTimer; +}; + +framework::DataProcessorSpec getTrackerSpec(bool useMC, gpu::GPUDataTypes::DeviceType dType = gpu::GPUDataTypes::DeviceType::CPU); + +} // namespace o2::trk +#endif /* O2_TRK_TRACKERDPL */ \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx new file mode 100644 index 0000000000000..3b2b44729b259 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx @@ -0,0 +1,31 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKWorkflow/RecoWorkflow.h" +#include "TRKWorkflow/TrackerSpec.h" +#include "Framework/CCDBParamSpec.h" + +namespace o2::trk::reco_workflow +{ + +framework::WorkflowSpec getWorkflow(bool useMC, + bool upstreamDigits, + bool upstreamClusters, + bool disableRootOutput, + bool useGPUWF, + o2::gpu::GPUDataTypes::DeviceType dtype) +{ + framework::WorkflowSpec specs; + specs.emplace_back(o2::trk::getTrackerSpec(useMC, dtype)); + return specs; +} + +} // namespace o2::trk::reco_workflow \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx new file mode 100644 index 0000000000000..4057bab3b948f --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx @@ -0,0 +1,116 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include + +#include "Framework/ControlService.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/CCDBParamSpec.h" +#include "TRKWorkflow/TrackerSpec.h" + +namespace o2 +{ +using namespace framework; +namespace trk +{ +using Vertex = o2::dataformats::Vertex>; + +TrackerDPL::TrackerDPL(std::shared_ptr gr, + bool isMC, + o2::gpu::GPUDataTypes::DeviceType dType) +{ + // mITSTrackingInterface.setTrackingMode(trMode); +} + +void TrackerDPL::init(InitContext& ic) +{ + // mTimer.Stop(); + // mTimer.Reset(); + // o2::base::GRPGeomHelper::instance().setRequest(mGGCCDBRequest); + // mChainITS.reset(mRecChain->AddChain()); + // mITSTrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), + // mChainITS->GetITSTrackerTraits(), + // mChainITS->GetITSTimeframe()); +} + +void TrackerDPL::stop() +{ + LOGF(info, "CPU Reconstruction total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); +} + +void TrackerDPL::run(ProcessingContext& pc) +{ + auto cput = mTimer.CpuTime(); + auto realt = mTimer.RealTime(); + mTimer.Start(false); + // mITSTrackingInterface.updateTimeDependentParams(pc); + // mITSTrackingInterface.run(pc); + mTimer.Stop(); + LOGP(info, "CPU Reconstruction time for this TF {} s (cpu), {} s (wall)", mTimer.CpuTime() - cput, mTimer.RealTime() - realt); +} + +// void TrackerDPL::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) +// { +// // mITSTrackingInterface.finaliseCCDB(matcher, obj); +// } + +void TrackerDPL::endOfStream(EndOfStreamContext& ec) +{ + LOGF(info, "TRK CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); +} + +DataProcessorSpec getTrackerSpec(bool useMC, o2::gpu::GPUDataTypes::DeviceType dType) +{ + std::vector inputs; + + // inputs.emplace_back("compClusters", "TRK", "COMPCLUSTERS", 0, Lifetime::Timeframe); + // inputs.emplace_back("patterns", "TRK", "PATTERNS", 0, Lifetime::Timeframe); + // inputs.emplace_back("ROframes", "TRK", "CLUSTERSROF", 0, Lifetime::Timeframe); + + // inputs.emplace_back("itscldict", "TRK", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("ITS/Calib/ClusterDictionary")); + // inputs.emplace_back("itsalppar", "TRK", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + auto ggRequest = std::make_shared(false, // orbitResetTime + false, // GRPECS=true + false, // GRPLHCIF + false, // GRPMagField + false, // askMatLUT + o2::base::GRPGeomRequest::None, // geometry, but ignored until it will be put in the CCDB + inputs, + true); + std::vector outputs; + outputs.emplace_back("TRK", "TRACKS", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRACKCLSID", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRKTrackROF", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICES", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICESROF", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "IRFRAMES", 0, Lifetime::Timeframe); + + if (useMC) { + // inputs.emplace_back("trkmclabels", "TRK", "CLUSTERSMCTR", 0, Lifetime::Timeframe); + // inputs.emplace_back("TRKMC2ROframes", "TRK", "CLUSTERSMC2ROF", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICESMCTR", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICESMCPUR", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRACKSMCTR", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRKTrackMC2ROF", 0, Lifetime::Timeframe); + } + + return DataProcessorSpec{ + "trk-tracker", + inputs, + outputs, + AlgorithmSpec{adaptFromTask(ggRequest, + useMC, + dType)}, + Options{}}; +} + +} // namespace trk +} // namespace o2 diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx new file mode 100644 index 0000000000000..0f75d42710400 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx @@ -0,0 +1,80 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKWorkflow/RecoWorkflow.h" +#include "CommonUtils/ConfigurableParam.h" +#include "ITStracking/TrackingConfigParam.h" +#include "ITStracking/Configuration.h" + +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigContext.h" +#include "Framework/CompletionPolicyHelpers.h" + +#include + +using namespace o2::framework; + +void customize(std::vector& policies) +{ + // o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); +} + +void customize(std::vector& policies) +{ + // ordered policies for the writers + policies.push_back(CompletionPolicyHelpers::consumeWhenAllOrdered(".*(?:TRK|trk).*[W,w]riter.*")); +} + +void customize(std::vector& workflowOptions) +{ + // option allowing to set parameters + std::vector options{ + {"digits-from-upstream", VariantType::Bool, false, {"digits will be provided from upstream, skip digits reader"}}, + {"clusters-from-upstream", VariantType::Bool, false, {"clusters will be provided from upstream, skip clusterizer"}}, + {"disable-root-output", VariantType::Bool, false, {"do not write output root files"}}, + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"disable-tracking", VariantType::Bool, false, {"disable tracking step"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}, + {"use-gpu-workflow", VariantType::Bool, false, {"use GPU workflow (default: false)"}}, + {"gpu-device", VariantType::Int, 1, {"use gpu device: CPU=1,CUDA=2,HIP=3 (default: CPU)"}}}; + std::swap(workflowOptions, options); +} + +#include "Framework/runDataProcessing.h" +#include "Framework/Logger.h" + +WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) +{ + // Update the (declared) parameters if changed from the command line + auto useMC = !configcontext.options().get("disable-mc"); + auto useGpuWF = configcontext.options().get("use-gpu-workflow"); + auto gpuDevice = static_cast(configcontext.options().get("gpu-device")); + auto extDigits = configcontext.options().get("digits-from-upstream"); + auto extClusters = configcontext.options().get("clusters-from-upstream"); + auto disableRootOutput = configcontext.options().get("disable-root-output"); + o2::conf::ConfigurableParam::updateFromString(configcontext.options().get("configKeyValues")); + + // write the configuration used for the reco workflow + o2::conf::ConfigurableParam::writeINI("o2itsrecoflow_configuration.ini"); + + return o2::trk::reco_workflow::getWorkflow(useMC, extDigits, extClusters, disableRootOutput, useGpuWF, gpuDevice); +} From ec8cf07940d0e6faedce2cb9815bd2043a5a7028 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Tue, 22 Apr 2025 16:25:33 +0200 Subject: [PATCH 0304/1764] Fix energy values for special runs --- Generators/share/egconfig/pythia8_NeNe.cfg | 8 ++++---- Generators/share/egconfig/pythia8_OO.cfg | 2 +- Generators/share/egconfig/pythia8_pO.cfg | 8 +++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Generators/share/egconfig/pythia8_NeNe.cfg b/Generators/share/egconfig/pythia8_NeNe.cfg index fff1dbb5f3d59..75a77236f5b4e 100644 --- a/Generators/share/egconfig/pythia8_NeNe.cfg +++ b/Generators/share/egconfig/pythia8_NeNe.cfg @@ -1,8 +1,8 @@ ### beams Beams:idA 1000100200 # Neon -Beams:idB 1000100200 # Neon -Beams:eCM 10720. # GeV +Beams:idB 1000100200 # Neon +Beams:eCM 5360. # GeV ### decays -ParticleDecays:limitTau0 on -ParticleDecays:tau0Max 10. +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. diff --git a/Generators/share/egconfig/pythia8_OO.cfg b/Generators/share/egconfig/pythia8_OO.cfg index ff098e6b65135..9a4419309a800 100644 --- a/Generators/share/egconfig/pythia8_OO.cfg +++ b/Generators/share/egconfig/pythia8_OO.cfg @@ -1,7 +1,7 @@ ### beams Beams:idA 1000080160 # Oxygen Beams:idB 1000080160 # Oxygen -Beams:eCM 10720. # GeV +Beams:eCM 5360. # GeV ### decays ParticleDecays:limitTau0 on diff --git a/Generators/share/egconfig/pythia8_pO.cfg b/Generators/share/egconfig/pythia8_pO.cfg index aff9d3337cd9d..64efc6954b363 100644 --- a/Generators/share/egconfig/pythia8_pO.cfg +++ b/Generators/share/egconfig/pythia8_pO.cfg @@ -1,8 +1,10 @@ ### beams +Beams:frameType 2 # back-to-back beams of different energies and particles Beams:idA 2212 # proton Beams:idB 1000080160 # Oxygen -Beams:eCM 13600. # GeV +Beams:eA 6800. # Energy of proton beam in GeV moving in the +z direction +Beams:eB 3400. # Energy in GeV per Oxygen nucleon (6.8 Z TeV) moving in the -z direction ### decays -ParticleDecays:limitTau0 on -ParticleDecays:tau0Max 10. +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. \ No newline at end of file From 2a11afc3af82fde89b936c0bb86648326e34a08a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Apr 2025 10:17:29 +0200 Subject: [PATCH 0305/1764] Improve / Add GPU documentation --- GPU/documentation/README.md | 13 ++++++++ GPU/documentation/build-O2.md | 24 +++++++------- GPU/documentation/build-standalone.md | 4 +-- GPU/documentation/deterministic-mode.md | 31 +++++++++++++++++++ GPU/documentation/run-time-compilation.md | 21 +++++++++++++ .../full-system-test/documentation/README.md | 17 ++++++++++ .../documentation/env-variables.md | 4 +-- .../full-system-test-as-stress-test.md | 2 +- .../documentation/full-system-test-setup.md | 4 +-- ...ata-simulation.md => raw-tf-conversion.md} | 0 10 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 GPU/documentation/deterministic-mode.md create mode 100644 GPU/documentation/run-time-compilation.md create mode 100644 prodtests/full-system-test/documentation/README.md rename prodtests/full-system-test/documentation/{raw-data-simulation.md => raw-tf-conversion.md} (100%) diff --git a/GPU/documentation/README.md b/GPU/documentation/README.md index e69de29bb2d1d..de888ab6e2436 100644 --- a/GPU/documentation/README.md +++ b/GPU/documentation/README.md @@ -0,0 +1,13 @@ +[build-O2.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-O2.md) : +- Instructions how to build O2 with GPU support. +- Description of the CMake variables used. + +[build-standalone.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-standalone.md) : +- Instructions how to build and run the standalone benchmark. +- Instructions how to extract data sets for the standalone benchmark from real data or using simulation. + +[deterministic-mode.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/deterministic-mode.md) : +- Instructions how to use the deterministic mode for both the standalone benchmark and O2. + +[run-time-compilation.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/run-time-compilation.md) : +- Instructions how to use run time compilation (RTC) for the GPU code. diff --git a/GPU/documentation/build-O2.md b/GPU/documentation/build-O2.md index 809d1fe0d5439..098629f45a832 100644 --- a/GPU/documentation/build-O2.md +++ b/GPU/documentation/build-O2.md @@ -12,17 +12,17 @@ If you just want to reproduce the GPU build locally without running it, it might The provisioning script of the container also demonstrates which patches need to be applied such that everything works correctly. *GPU Tracking with CUDA* - * The CMake option -DENABLE_CUDA=ON/OFF/AUTO steers whether CUDA is forced enabled / unconditionally disabled / auto-detected. - * The CMake option -DCUDA_COMPUTETARGET= fixes a GPU target, e.g. 61 for PASCAL or 75 for Turing (if unset, it compiles for the lowest supported architecture) + * The CMake option `-DENABLE_CUDA=ON/OFF/AUTO` steers whether CUDA is forced enabled / unconditionally disabled / auto-detected. + * The CMake option `-DCUDA_COMPUTETARGET=...` fixes a GPU target, e.g. 61 for PASCAL or 75 for Turing (if unset, it compiles for the lowest supported architecture) * CUDA is detected via the CMake language feature, so essentially nvcc must be in the Path. - * We require CUDA version >= 11.2 + * We require CUDA version >= 12.8 * CMake will report "Building GPUTracking with CUDA support" when enabled. *GPU Tracking with HIP* * HIP and HCC must be installed, and CMake must be able to detect HIP via find_package(hip). - * If HIP and HCC are not installed to /opt/rocm, the environment variables $HIP_PATH and $HCC_HOME must point to the installation directories. + * If HIP and HCC are not installed to /opt/rocm, the environment variables `$HIP_PATH` and `$HCC_HOME` must point to the installation directories. * HIP from ROCm >= 4.0 is required. - * The CMake option -DHIP_AMDGPUTARGET= forces a GPU target, e.g. gfx906 for Radeon VII (if unset, it auto-detects the GPU). + * The CMake option `-DHIP_AMDGPUTARGET=...` forces a GPU target, e.g. gfx906 for Radeon VII (if unset, it auto-detects the GPU). * CMake will report "Building GPUTracking with HIP support" when enabled. * It may be that some patches must be applied to ROCm after the installation. You find the details in the provisioning script of the GPU CI container below. @@ -49,14 +49,14 @@ The provisioning script of the container also demonstrates which patches need to * The docker images is `alisw/slc8-gpu-builder`. * The container exports the `ALIBUILD_O2_FORCE_GPU` env variable, which force-enables all GPU builds. * Note that it might not be possible out-of-the-box to run the GPU version from within the container. In case of HIP it should work when you forwards the necessary GPU devices in the container. For CUDA however, you would either need to (in addition to device forwarding) match the system CUDA driver and toolkit installation to the files present in the container, or you need to use the CUDA docker runtime, which is currently not installed in the container. - * There are currently some patches needed to install all the GPU backends in a proper way and together. Please refer to the container provisioning script https://github.com/alisw/docks/blob/master/slc9-gpu-builder/provision.sh. If you want to reproduce the installation locally, it is recommended to follow the steps from the script. + * There are currently some patches needed to install all the GPU backends in a proper way and together. Please refer to the container provisioning script [provision.sh](https://github.com/alisw/docks/blob/master/slc9-gpu-builder/provision.sh). If you want to reproduce the installation locally, it is recommended to follow the steps from the script. *Summary* If you want to enforce the GPU builds on a system without GPU, please set the following CMake settings: - * ENABLE_CUDA=ON - * ENABLE_HIP=ON - * ENABLE_OPENCL=ON - * HIP_AMDGPUTARGET=gfx906;gfx908 - * CUDA_COMPUTETARGET=86 89 -Alternatively you can set the environment variables ALIBUILD_ENABLE_CUDA and ALIBUILD_ENABLE_HIP to enforce building CUDA or HIP without modifying the alidist scripts. + * `ENABLE_CUDA=ON` + * `ENABLE_HIP=ON` + * `ENABLE_OPENCL=ON + * `HIP_AMDGPUTARGET=default` + * `CUDA_COMPUTETARGET=default` +Alternatively you can set the environment variables `ALIBUILD_ENABLE_CUDA=1` and `ALIBUILD_ENABLE_HIP=1` to enforce building CUDA or HIP without modifying the alidist scripts. diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md index d4e9da5cd5bf3..891d16b4dc2c4 100644 --- a/GPU/documentation/build-standalone.md +++ b/GPU/documentation/build-standalone.md @@ -30,7 +30,7 @@ nano config.cmake # edit config file to enable / disable dependencies as needed. make install -j32 ``` -You can edit certain build settings in `config.cmake`. Some of them are identical to the GPU build settings for O2, as described in O2-786. +You can edit certain build settings in `config.cmake`. Some of them are identical to the GPU build settings for O2, as described in [build-O2.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-O2.md). And there are plenty of additional settings to enable/disable event display, qa, usage of ROOT, FMT, etc. libraries. This will create the `ca` binary in `~/standalone`, which is basically the same as the `o2-gpu-standalone-benchmark`, but built outside of O2. @@ -68,7 +68,7 @@ This will dump the event data to the local folder, all dumped files have a `.dum Data can be dumped from raw data, or from MC data, e.g. generated by the Full System Test. In case of MC data, also MC labels are dumped, such that they are used in the `./ca --qa` mode. -To get a dump from simulated data, please run e.g. the FST simulation as described in O2-2633. +To get a dump from simulated data, please run e.g. the FST simulation as described in [full-system-test-setup.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md). A simple run as ``` DISABLE_PROCESSING=1 NEvents=5 NEventsQED=100 SHMSIZE=16000000000 $O2_ROOT/prodtests/full_system_test.sh diff --git a/GPU/documentation/deterministic-mode.md b/GPU/documentation/deterministic-mode.md new file mode 100644 index 0000000000000..9c8db2930ceaa --- /dev/null +++ b/GPU/documentation/deterministic-mode.md @@ -0,0 +1,31 @@ +The TPC tracking code is not fully deterministic, i.e. running multiple times on the same data set might yield a slightly different number of tracks on the O(per mille) level. +- This comes from concurrency, i.e. when tracks are processed in parallel, the output order might change, which might have small effects on the consecutive steps. +- Also compile options and optimizations play a row, e.g. using ffast-math or fused-multiply-add might slightly change the rounding of floating point, and in rare cases lead to the acceptance or rejection of a track, and thus a different number of tracks. + +For debugging, testing, and validation, a deterministic mode is implemented, which should yield 100% reproducible results, on CPU and on GPU and when running multiple times. +It uses a combination of +- Compile time options, e.g. disabling all optimizations that change floating point rounding. +- Run time options, e.g. to use deterministic sorting, and add additional sorting steps after kernels to make the output deterministic, also intermediate outputs. + +This is steered by 3 options: +- The `-DGPUCA_DETERMINISTIC_MODE` Cmake setting : Compile-time setting. +- The `--PROCdeterministicGPUReconstruction` command line option / `GPU_proc.deterministicGPUReconstruction` `--configKeyValue` setting : Run time setting. +- The `--RTCdeterministic` command line option / `GPU_proc_rtc.deterministic` `--configKeyValue` setting. (Auto-enabled by the `deterministicGPUReconstruction` setting.) : Compile-time setting for RTC code. + +In order to be fully deterministic, all settings must be enabled, where the RTC setting is automatically enabled if not explicitly disabled. + +`GPUCA_DETERMINISTIC_MODE` has multiple levels, which are described here: [FindO2GPU.cmake](https://github.com/AliceO2Group/AliceO2/blob/80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9/dependencies/FindO2GPU.cmake#L72). +- In order to have fully deterministic GPUReconstruction (i.e. all algorithms that come with the GPUTracking library, like TPC tracking), the level `GPUCA_DETERMINISTIC_MODE=GPU` is needed. +- In order to apply it to all of O2, e.g. for ITS tracking, please use `GPUCA_DETERMINISTIC_MODE=WHOLEO2` + +Enabling the options is a bit different for O2 and for the standalone benchmark: +- For enabling it in the standalone benchmark, please set GPUCA_DETERMINISTIC_MODE=GPU in [config.cmake](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/GPUTracking/Standalone/cmake/config.cmake) and use the command line argument `--PROCdeterministicGPUReconstruction 1`. +- For O2, Either add `set(GPUCA_DETERMINISTIC_MODE GPU)` to the beginning of the [GPU CMakeLists.txt](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/CMakeLists.txt) or add `set(GPUCA_DETERMINISTIC_MODE WHOLEO2)` to the beginning of the [Global CMakeLists.txt](https://github.com/AliceO2Group/AliceO2/blob/dev/CMakeLists.txt), and use the `configKeyValue` `GPU_proc.deterministicGPUReconstruction`. In order to enable this for the Full-System-Test or with [dpl-workflow.sh](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/dpl-workflow.sh), please export `CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow=GPU_proc.deterministicGPUReconstruction=1;`. + +With these settings, if one runs multiple times, the number of clusters and number of tracks should be always fully identical. +Note that this yields a significant performance penalty during the processing, therefore the deterministic mode is not compiled in by default, but it must be enabled explicitly and code must be recompiled. + +Beyond comparing only the number of clusters and number of tracks, it is also possible to compare intermediate results. To do so, please use the standalone benchmark (either `./ca` or `o2-gpu-standalone-benchmark` binary) with the `--debug 6` option. +It will create a dump container all (most) intermediate results in text form, which can be compared. The output files is called `CPU.out` if using the CPU backend, and `GPU.out` for the GPU backend. +Note that the dump files will be huge and the processing will be slow and consume much more memory than normal with `--debug 6 . It has been tested with datasets containing up to 50 Pb-Pb collisions, and might fail for larger data. +The dump files (if the deterministic mode is used with both compile- and runtime-activation), the files should be 100% identical and can just be compared with `diff`. diff --git a/GPU/documentation/run-time-compilation.md b/GPU/documentation/run-time-compilation.md new file mode 100644 index 0000000000000..accfceb47b870 --- /dev/null +++ b/GPU/documentation/run-time-compilation.md @@ -0,0 +1,21 @@ +Run time compilation is a feature of the GPUReconstruction library, which can recompile the GPU code for HIP and for CUDA at runtime, and apply some optimizations and changes. It is planned to add support for CPU code and OpenCL code in the future. + +The changes that can be applied are: +- `constexpr` optimization: configuration values that are constant during the processing are replaced by `constexpr` expressions, which allows the compiler to optimize the code better. Benchmarks in 2024 habe shown 5% performance improvement with CUDA and 2% improvement with HIP. +- Disabling of unused code, in particular this is currently used to remove the TPC code for V/M shape correction during online processing, simplifying the code, and yielding better compiler optimization, for a 20%-30% speedup on the MI50 GPUs. +- Use different GPU constant parameters / launch bounds: These are tuning parameters, which are architecutre-dependent. The default values are taken from the first architecture the GPU code is compiled for in the normal compilation phase. If the architecture we are running on is different, different parameters can be loaded for RTC. +- Compiling for different target architectures. This allows us to enable running on hardware, for which the code was not compiled in the original compilation. + +Generally, RTC is enabled via the `--RTCenable` flag for the standalone benchmark, or via the `GPU_proc_rtc.enable=1` `configKeyValue` for O2. +For a list of RTC options, please see [GPUSettingsList.h](https://github.com/AliceO2Group/AliceO2/blob/80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9/GPU/GPUTracking/Definitions/GPUSettingsList.h#L215). + +Caching the output: +- The RTC output can be cached and reused, so that when running multiple times, compilation is not repeated. This is enabled via the `--RTCcacheOutput` setting. The folder to store the cache files can be selected via `--RTCTECHcacheFolder` and with `--RTCTECHcacheMutex` (default: enabled), a file-lock mutex can be used to synchronize access to the cache folder. The cached code is checked against the to-be-compiled source code with SHA1 hashes, and only if the code is not change the cache is used, otherwise the code is recompiled and the cache updated. It is possible to force using outdated cache files via the `--RTCTECHignoreCacheValid` option. + +For chaning the launch bounds and other parameters, please consider `--RTCTECHloadLaunchBoundsFromFile` (and `--RTCTECHprintLaunchBounds`), which can launch a parameter set which can be created via [dumpGPUDefParam.C](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C). A set of default parameters is stored in `[INSTALL_FOLDER]/share/GPU`. + +It is possible to select a different target architecture for the compilation via `--RTCTECHoverrideArchitecture`, and the compilation can be prepended by a command with `--RTCTECHprependCommand`, e.g. for CPU pinning. See for example [dpl-workflow.sh](https://github.com/AliceO2Group/AliceO2/blob/80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9/prodtests/full-system-test/dpl-workflow.sh#L335). + +`--RTCdeterministic` enables the [Deterministic Mode](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/deterministic-mode.md) (compile-time setting) for RTC. Usually you don't need to bother, as for the deterministic mode it is autoenabled from `--PROCdeterministicGPUReconstruction`, but the explicit `--RTCdeterministic` is available for tests. + +Finally, `--RTCoptConstexpr` and `--RTCoptSpecialCode` enable the constexpr and code removal optimizations. For an example how the TPC V/M shape corrections are removed, see [TPCFastTransform.h](https://github.com/AliceO2Group/AliceO2/blob/fc3ace17eca580c338751163ef4528e3ec47f9d6/GPU/TPCFastTransformation/TPCFastTransform.h#L445). diff --git a/prodtests/full-system-test/documentation/README.md b/prodtests/full-system-test/documentation/README.md new file mode 100644 index 0000000000000..1fdef1da36ecd --- /dev/null +++ b/prodtests/full-system-test/documentation/README.md @@ -0,0 +1,17 @@ +[full-system-test.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test.md) : +- Full system test quick start guide + +[full-system-test-setup.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md) : +- More detailed description of full-system-test scripts, simulation of data set, and script to run the workflow + +[full-system-test-as-stress-test.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md) : +- Details on how to use the full system test as stress test and for validation of an EPN online compute node + +[dpl-workflow-options.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/dpl-workflow-options.md) : +- Description of the main workflow script [dpl-workflow.sh](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/dpl-workflow.sh) and its options. + +[env-variables.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/env-variables.md) : +- List of common environment variables used by the workflow scripts (defaults set by https://github.com/davidrohr/O2DPG/blob/master/DATA/common/setenv.sh) + +[raw-tf-conversion.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-tf-conversion.md) : +- This is automated in a script now, but just in case details how readout files are converted to a .tf file for usage in the full system test with replay from DataDistribution. diff --git a/prodtests/full-system-test/documentation/env-variables.md b/prodtests/full-system-test/documentation/env-variables.md index b93622c0a0f94..5a13f2ee9e19d 100644 --- a/prodtests/full-system-test/documentation/env-variables.md +++ b/prodtests/full-system-test/documentation/env-variables.md @@ -1,4 +1,4 @@ -The `setenv-sh` script sets the following environment options +The [setenv-sh](https://github.com/davidrohr/O2DPG/blob/master/DATA/common/setenv.sh) script sets the following environment options * `NTIMEFRAMES`: Number of time frames to process. * `TFDELAY`: Delay in seconds between publishing time frames (1 / rate). * `NGPUS`: Number of GPUs to use, data distributed round-robin. @@ -25,7 +25,7 @@ The `setenv-sh` script sets the following environment options * `EXTINPUT`: Receive input from raw FMQ channel instead of running o2-raw-file-reader. * 0: `dpl-workflow.sh` can run as standalone benchmark, and will read the input itself. * 1: To be used in combination with either `datadistribution.sh` or `raw-reader.sh` or with another DataDistribution instance. -* `CTFINPUT`: Read input from CTF ROOT file. This option is incompatible to EXTINPUT=1. The CTF ROOT file can be stored via SAVECTF=1. +* `CTFINPUT`: Read input from CTF ROOT file. This option is incompatible to `EXTINPUT=1`. The CTF ROOT file can be stored via `SAVECTF=1`. * `NHBPERTF`: Time frame length (in HBF) * `GLOBALDPLOPT`: Global DPL workflow options appended to o2-dpl-run. * `EPNPIPELINES`: Set default EPN pipeline multiplicities. diff --git a/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md index 0c4637ece0920..c78d81b236c1c 100644 --- a/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md +++ b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md @@ -7,7 +7,7 @@ This is a quick summary how to run the full system test (FST) as stress test on - Enter the O2PDPSuite environment either vie `alienv enter O2PDPSuite/latest Readout/latest`. - Go to an empty directory. - Run the FST simulation via: `NEvents=650 NEventsQED=10000 SHMSIZE=128000000000 TPCTRACKERSCRATCHMEMORY=40000000000 SPLITTRDDIGI=0 GENERATE_ITSMFT_DICTIONARIES=1 $O2_ROOT/prodtests/full_system_test.sh` - - Get a current matbud.root (e.g. from here https://alice.its.cern.ch/jira/browse/O2-2288) and place it in that folder. + - Material budget table (e.g. from here https://alice.its.cern.ch/jira/browse/O2-2288) now comes from CCDB, no need any more to pull it manually. - Create a timeframe file from the raw files: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. - Prepare the ramdisk folder: `mv raw/timeframe raw/timeframe-org; mkdir raw/timeframe-tmpfs; ln -s timeframe-tmpfs raw/timeframe` diff --git a/prodtests/full-system-test/documentation/full-system-test-setup.md b/prodtests/full-system-test/documentation/full-system-test-setup.md index 82ef9b7d0c74f..e90a3984dd3da 100644 --- a/prodtests/full-system-test/documentation/full-system-test-setup.md +++ b/prodtests/full-system-test/documentation/full-system-test-setup.md @@ -16,7 +16,7 @@ If you just want to test a small dataset, you can skip the following steps, and - I'd suggest to do a first small test with 1-5 events to check the machinery, 100 events is already a good size which should not exhaust the memory, I'd go to 600 only after 100 works. 1. Compile O2 with GPU support, in addition you need O2sim, DataDistribution, and Readout (latest versions from alidist will do). GPUs for O2 should be auto-detected, but you can set the environment variables ALIBUILD_ENABLE_CUDA / ALIBUILD_ENABLE_HIP to enforce it (and get a failure when detection fails). Look for CMake log messages "Building GPUTracking with CUDA support" (etc) to verify. - For more information, see https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build.md + For more information, see https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-O2.md 1. Optionally place some binary configuration files in the simulation folder. Default objects will be used if no such files are placed. There are instructions at the end of this post how to generate these files. (Currently, these files are: matbud.root, ITSdictionary.bin, ctf_dictionary.root, tpctransform.root, dedxsplines.root, and tpcpadgaincalib.root) 1. Load the O2sim environment (`alienv enter O2sim/latest`) and run the following full system test script for a full simulation and digits to raw conversion (this will already include 1 CPU reconstruction run): ``` @@ -37,7 +37,7 @@ If you just want to test a small dataset, you can skip the following steps, and ``` This will use 4 GPU with the HIP backend and allocate 22 GB of scratch memory on the GPU (should be sufficient for 128 orbit TF). You can change the GPU type as indicated in the linked README.md above, e.g. `GPUTYPE=CUDA NGPUS=1` for 1 CUDA GPU. 1. With this, the full chain is running inside O2 DPL. Next we are adding DataDistribution. - 1. Ceate the TF files as explained in the subtask (https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-data-simulation.md). For convenience, there is a script that should do it automatically, from a shell that has loaded both DataDistribution and Readout: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. + 1. Ceate the TF files as explained in the subtask ([raw-tf-conversion.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-tf-conversion.md)). For convenience, there is a script that should do it automatically, from a shell that has loaded both DataDistribution and Readout: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. 1. Enter the O2 environment, and run the following script (please adjust the variables as in the test before). ``` EXTINPUT=1 SHMSIZE=128000000000 GPUTYPE=CPU $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh diff --git a/prodtests/full-system-test/documentation/raw-data-simulation.md b/prodtests/full-system-test/documentation/raw-tf-conversion.md similarity index 100% rename from prodtests/full-system-test/documentation/raw-data-simulation.md rename to prodtests/full-system-test/documentation/raw-tf-conversion.md From 175d0147d5ecd5dcf776a774f8da28a6e893d509 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Apr 2025 13:19:21 +0200 Subject: [PATCH 0306/1764] GPU: Add documentation for TPC Tracking QA --- GPU/documentation/README.md | 3 ++ GPU/documentation/tpc-tracking-qa.md | 78 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 GPU/documentation/tpc-tracking-qa.md diff --git a/GPU/documentation/README.md b/GPU/documentation/README.md index de888ab6e2436..0f6ba79df63ce 100644 --- a/GPU/documentation/README.md +++ b/GPU/documentation/README.md @@ -11,3 +11,6 @@ [run-time-compilation.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/run-time-compilation.md) : - Instructions how to use run time compilation (RTC) for the GPU code. + +[tpc-tracking-qa.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/tpc-tracking-qa.md) : +- Instructions how to run the QA for TPC tracking (efficiency, resolution, etc.). diff --git a/GPU/documentation/tpc-tracking-qa.md b/GPU/documentation/tpc-tracking-qa.md new file mode 100644 index 0000000000000..4487d10d0fe70 --- /dev/null +++ b/GPU/documentation/tpc-tracking-qa.md @@ -0,0 +1,78 @@ +This is a quick documentation on the TPC Tracking QA for Resolution, Efficiency, and Cluster Attachment based on MC data. + +The TPC QA can produce 3 different output types: +* *mergeble* histograms: A collection of ROOT histograms that can be merged from different inputs. The need to be postprocessed to get meaningful output. +* *postprocessed* histograms: Histograms showing the efficiencies, resolutions etc. These histograms can no longer be merged from multiple inputs. +* *layouts*: TCanvases with multiple postprocessed histograms arranged in reasonable layouts. + +The TPC Tracking QA consists of multiple QA subtasks ((de)activated via a bitmask): +* *Efficiency* / *Clone Rate* / *Fake Rate* (1) +* *Resolution* (2) +* *Pulls* (4) +* *Cluster Attachment Statistics* (8) +* *nClusters and pt distribution* (16) +* *Cluster rejection counts* (32) (both as aggregate text report and as histogram) + +The TPC QA can run in 3 different ways: +* *Standalone* inside the tracking (o2-tpc-reco-workflow), it will write its output in pdf format to the plots folder in the current directory: supports *all subtasks* and will always produce the *layouts* output. +* As *external source* to QC: the QA is running inside the o2-tpc-reco-workflow and ships the histograms to QC via DPL: supports *all subtasks*, can produce the *postprocessed* or the *layouts* output. +* As *independent* QC tasks, operating on DPL input, that can e.g. be read from ROOT files: supports *subtasks* *1*, *2*, and *4*, currently is hardcoded to the *mergeable* output, but could be made configurable. +_(Note: the reason that the independent QC supports fewer subtasks is that the other tasks require internal tracking data structures that are no available a posteriori.)_ + +Remark on the *Cluster Rejection count histograms* for the *online QC*: +* These are mainly meant for monitoring the TPC compression during data taking without MC information, while most other subtasks rely on MC information. +* These are always in the mergeable format, the postprocessing will just forward them. +* By default they are disabled and only aggregate text output shows the rejection ratios, they must be enabled explicitly as explained below. + +Running the TPC QA standalone: +* It must be enabled via the configKeyValue GPU_proc.runQA in the o2-tpc-reco-workflow. The QA will run as part of the normal TPC tracking and will have access to all data structures of the tracking. Otherwise, the normal settings for the o2-tpc-reco-workflow apply. +* Example to run on digits: +{code}o2-tpc-reco-workflow -b --infile tpcdigits.root --configKeyValues "GPU_proc.runQA=1;" --output-type clusters,tracks{code} +* Example to run on clusters: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks --configKeyValues "GPU_proc.runQA=1"{code} + +Running the TPC QA inside the o2-tpc-reco-workflow as external source for qc: +* As in the standalone mode, the TPC Tracking QA will run as part of the o2-tpc-reco-workflow with full access to the tracking data structures. +* The output is shipped to QC as external qc-input in the form of ROOT histograms, which disables some subtasks that do not produce ROOT histograms (such as the cluster counts (32)). +* A merged workflow of the o2-tpc-reco-workflow and qc must be configures, such as: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks,qa | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTrackingFromExternal_direct.json{code} +(Note that by default, the output will be uploaded and visible at https://qcg-test.cern.ch/) +* By default, this will create postprocessed histograms, and it can be switched via configKeyValues to layouts output via: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks,qa --configKeyValues "GPU_QA.shipToQCAsCanvas=true" | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTrackingFromExternal_direct.json{code} +* This mode can be combined with the standalone QA mode. +* Running the *online cluster rejection histogram QA*: +** This mode works also without MC information, it must be enabled explicitly. Irrespective of the output mode, it will always furnish the same mergeable histograms, since there is no postprocessing. An example to run them on ROOT files: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks,qa --disable-mc --configKeyValues "GPU_QA.clusterRejectionHistograms=1" | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTrackingFromExternal_direct.json{code} +** In order to set the x-axis scale (number of clusters), use the configKeyValue GPU_QA.histMaxNClusters. + +Running the QA as independent QC task: +* In this mode, the QA runs independently from the o2-tpc-reco-workflow, getting clusters, tracks, and MC labels via DPL. +* The example below uses the track reader and the reco workflow to fetch the input and ship them via DPL, but the inputs can of course also come from other sources. +* This mode is mostly foreseen for the mergeble output. In this way, many instance can run in parallel and the output can be merged before being postprocessed. +* Currently, the mergeble output is hardcoded, but the task could easily be extended for other outputs (see below). +* To run the tasks on ROOT file input, you can use the following example: +{code}o2-tpc-track-reader | o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type disable-writer | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTracking_direct.json{code} + +Postprocessing the mergeble output: +* The o2::tpc::qc::Tracking class can be used for the postprocessing in a standalone way. +* Initialize the class with the settings postprocessOnly = true and outputMode = outputPostprocessed or outputLayout. +* Call the postprocess(...) function, passing in std::vectors of the 3 types of ROOT histograms (TH1F, TH2F, TH1D) which are used by the QA. Note that the order of the histograms must be the same as obtained in the mergeble output. +* Depending on the outputMode setting, the class will fill the out object witl either the postprocessed histograms or the canvas layouts. + +The following classes in O2 / QC belong to the TPC tracking QA: +* o2::gpu::GPUQA (O2/GPU/GPUTracking/Standalone/qa/GPUQA.cxx): The main QA class, which can produce the standalone output, or can run with external input driven from the o2::gpu::GPUO2InterfaceQA. +* o2::gpu::GPUO2InterfaceQA (O2/GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx): Internal interface class, uses o2::gpu::GPUQA. +* o2::tpc::qc::Tracking (O2/Detectors/TPC/qc/src/Tracking.cxx): The main QC class for running the TPC QA independent from the o2-tpc-reco-workflow, uses o2::gpu::GPUO2InterfaceQA. +** Can produce all output types from tracks / clusters as input. +** Can produce postprocessed / layout output from mergeble input. +** Is limited to subtasks that do not require access to the internal tracking data structures. +* o2::quality_control_modules::tpc::Tracking (QC/Modules/TPC/src/Tracking.cxx): Mostly a QC wrapper for o2::tpc::qc::Tracking, with the necessary framework code to receive clusters / tracks / MC labels via DPL. + +Several additional settings can be configured via configKeyValues as listed in https://github.com/AliceO2Group/AliceO2/blob/dev/Detectors/TPC/qc/include/TPCQC/Tracking.h#L44: +* "GPU_QA.strict=[bool]" Strict QA mode: Only consider resolution of tracks where the fit ended within 5 cm of the reference, and remove outliers. (Default: true) +* "GPU_QA.qpt=[float]" Set cut for Q/Pt. (Default: 10.0) +* "GPU_QA.recThreshold=[float]" Compute the efficiency including impure tracks with fake contamination. (Default 0.9) +* "GPU_QA.maxResX=[float]" Maxmimum X (~radius) for reconstructed track position to take into accound for resolution QA in cm (Default: no limit) +* "GPU_QA.nativeFitResolutions=[bool]" Create resolution histograms in the native fit units (sin(phi), tan(lambda), Q/Pt) (Default: false) +* "GPU_QA.filterCharge=[int]" Filter for positive (+1) or negative (-1) charge (Default: no filter) +* "GPU_QA.filterPID=[int]" Filter for Particle Type (0 Electron, 1 Muon, 2 Pion, 3 Kaon, 4 Proton) (Default: no filter) From 3eadf367abe68bd24ac39c6eb3ea1470dd596663 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 29 Apr 2025 10:52:37 +0200 Subject: [PATCH 0307/1764] dpl-workflow.sh: Add GEN_TOPO_NO_TF_RATE_UPSCALING setting --- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 2 +- prodtests/full-system-test/dpl-workflow.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index d2d06df7b5710..db7a3b5884a12 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -117,7 +117,7 @@ class GPUTPCGMPropagator GPUd() float PredictChi2(float posY, float posZ, float err2Y, float err2Z) const; GPUd() int32_t RejectCluster(float chiY, float chiZ, uint8_t clusterState) { - if (chiY > 9.f || chiZ > 9.f) { + if (chiY > 9.f || chiZ > 9.f) { // TODO: Check how a track can have chi2/ncl > 18 return 2; } if ((chiY > 6.25f || chiZ > 6.25f) && (clusterState & (GPUTPCGMMergedTrackHit::flagSplit | GPUTPCGMMergedTrackHit::flagShared))) { diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index bb2712bedd92e..ebe6410d2c804 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -51,7 +51,11 @@ if [[ $EPNSYNCMODE == 1 ]] || type numactl >/dev/null 2>&1 && [[ `numactl -H | g [[ $NUMAGPUIDS != 0 ]] && ARGS_ALL+=" --child-driver 'numactl --membind $NUMAID --cpunodebind $NUMAID'" fi if [[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && [[ $DIGITINPUT != 1 ]]; then - RECO_NUM_NODES_WORKFLOW_CMP=$(($RECO_NUM_NODES_WORKFLOW > 15 ? ($RECO_NUM_NODES_WORKFLOW < 230 ? $RECO_NUM_NODES_WORKFLOW : 230) : 15)) + if [[ ${GEN_TOPO_NO_TF_RATE_UPSCALING:-0} == 1 ]]; then + RECO_NUM_NODES_WORKFLOW_CMP=$RECO_NUM_NODES_WORKFLOW + else + RECO_NUM_NODES_WORKFLOW_CMP=$(($RECO_NUM_NODES_WORKFLOW > 15 ? ($RECO_NUM_NODES_WORKFLOW < 230 ? $RECO_NUM_NODES_WORKFLOW : 230) : 15)) + fi TIMEFRAME_RATE_LIMIT=$((12 * 230 / ${RECO_NUM_NODES_WORKFLOW_CMP} * ($NUMAGPUIDS != 0 ? 1 : 2) * 128 / $NHBPERTF)) [[ $BEAMTYPE != "PbPb" && ${HIGH_RATE_PP:-0} == 0 ]] && TIMEFRAME_RATE_LIMIT=$(($TIMEFRAME_RATE_LIMIT * 3)) ! has_detector TPC && TIMEFRAME_RATE_LIMIT=$(($TIMEFRAME_RATE_LIMIT * 4)) From 2482a563ec50e43d51e1edba40dbfdf89ba36e21 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 30 Apr 2025 16:09:20 +0200 Subject: [PATCH 0308/1764] dpl-workflow.sh: make some MCH config key values fixed default --- prodtests/full-system-test/dpl-workflow.sh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index ebe6410d2c804..b51594115154c 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -355,16 +355,14 @@ has_detector_reco MID && has_detector_matching MCHMID && MFTMCHConf="FwdMatching if has_processing_step MUON_SYNC_RECO; then [[ -z ${ARGS_EXTRA_PROCESS_o2_mid_reco_workflow:-} ]] && ARGS_EXTRA_PROCESS_o2_mid_reco_workflow="--mid-tracker-keep-best" [[ -z ${ARGS_EXTRA_PROCESS_o2_mch_reco_workflow:-} ]] && ARGS_EXTRA_PROCESS_o2_mch_reco_workflow="--digits" - if [[ -z ${CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow:-} ]]; then - if [[ $IS_SIMULATED_DATA == 1 ]]; then - CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow="MCHTimeClusterizer.peakSearchSignalOnly=false;MCHDigitFilter.rejectBackground=false;" - elif [[ $RUNTYPE == "PHYSICS" && $BEAMTYPE == "pp" ]] || [[ $RUNTYPE == "COSMICS" ]]; then - CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow="MCHTracking.chamberResolutionX=0.4;MCHTracking.chamberResolutionY=0.4;MCHTracking.sigmaCutForTracking=7.;MCHTracking.sigmaCutForImprovement=6.;" - fi - has_detector_reco ITS && [[ $RUNTYPE != "COSMICS" ]] && CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow+="MCHTimeClusterizer.irFramesOnly=true;" - [[ ! -z ${CUT_RANDOM_FRACTION_MCH:-} ]] && CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow+="MCHTimeClusterizer.rofRejectionFraction=$CUT_RANDOM_FRACTION_MCH;" - CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow+="MCHStatusMap.useHV=false;MCHDigitFilter.statusMask=3;" + if [[ $IS_SIMULATED_DATA == 1 ]]; then + MCH_CONFIG_KEY+="MCHTimeClusterizer.peakSearchSignalOnly=false;MCHDigitFilter.rejectBackground=false;" + elif [[ $RUNTYPE == "PHYSICS" && $BEAMTYPE == "pp" ]] || [[ $RUNTYPE == "COSMICS" ]]; then + MCH_CONFIG_KEY+="MCHTracking.chamberResolutionX=0.4;MCHTracking.chamberResolutionY=0.4;MCHTracking.sigmaCutForTracking=7.;MCHTracking.sigmaCutForImprovement=6.;" fi + has_detector_reco ITS && [[ $RUNTYPE != "COSMICS" ]] && MCH_CONFIG_KEY+="MCHTimeClusterizer.irFramesOnly=true;" + [[ ! -z ${CUT_RANDOM_FRACTION_MCH:-} ]] && MCH_CONFIG_KEY+="MCHTimeClusterizer.rofRejectionFraction=$CUT_RANDOM_FRACTION_MCH;" + MCH_CONFIG_KEY+="MCHStatusMap.useHV=false;MCHDigitFilter.statusMask=3;" [[ $RUNTYPE == "COSMICS" ]] && [[ -z ${CONFIG_EXTRA_PROCESS_o2_mft_reco_workflow:-} ]] && CONFIG_EXTRA_PROCESS_o2_mft_reco_workflow="MFTTracking.FullClusterScan=true" fi [[ $SYNCRAWMODE == 1 ]] && [[ -z ${CONFIG_EXTRA_PROCESS_o2_zdc_digits_reco:-} ]] && CONFIG_EXTRA_PROCESS_o2_zdc_digits_reco='RecoParamZDC.tdc_calib[9]=1;RecoParamZDC.tdc_calib[0]=1;RecoParamZDC.tdc_calib[8]=1;RecoParamZDC.tdc_calib[1]=1;RecoParamZDC.tdc_calib[3]=1;RecoParamZDC.tdc_calib[6]=1;RecoParamZDC.tdc_calib[5]=1;RecoParamZDC.tdc_calib[4]=1;RecoParamZDC.tdc_calib[2]=1;RecoParamZDC.tdc_calib[7]=1;RecoParamZDC.energy_calib[13]=1;RecoParamZDC.energy_calib[12]=1;RecoParamZDC.energy_calib[11]=1;RecoParamZDC.energy_calib[6]=1;RecoParamZDC.energy_calib[25]=1;RecoParamZDC.energy_calib[14]=1;RecoParamZDC.energy_calib[20]=1;RecoParamZDC.energy_calib[5]=1;RecoParamZDC.energy_calib[0]=1;RecoParamZDC.energy_calib[19]=1;RecoParamZDC.tower_calib[1]=1;RecoParamZDC.tower_calib[2]=1;RecoParamZDC.tower_calib[3]=1;RecoParamZDC.tower_calib[4]=1;RecoParamZDC.tower_calib[24]=1;RecoParamZDC.tower_calib[21]=1;RecoParamZDC.tower_calib[22]=1;RecoParamZDC.tower_calib[23]=1;RecoParamZDC.tower_calib[18]=1;RecoParamZDC.tower_calib[16]=1;RecoParamZDC.tower_calib[17]=1;RecoParamZDC.tower_calib[15]=1;RecoParamZDC.tower_calib[8]=1;RecoParamZDC.tower_calib[9]=1;RecoParamZDC.tower_calib[7]=1;RecoParamZDC.tower_calib[10]=1' From 82782fd2272801c9b0a961a1b2204c949091dd69 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 2 May 2025 11:26:58 +0200 Subject: [PATCH 0309/1764] DPL Analysis: fix case in which booleans are stored in more than one chunk (#14230) --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index 4481c06a8c6d9..90b862e5fc8d1 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -209,7 +209,7 @@ auto readBoolValues = [](uint8_t* target, ReadOps& op, TBufferFile& rootBuffer) int readLast = 0; while (readEntries < op.rootBranchEntries) { auto beginValue = readLast; - auto readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); + readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); int size = readLast * op.listSize; readEntries += readLast; for (int i = beginValue; i < beginValue + size; ++i) { From 77beb78fb1f066153157ee06738e1405544d381d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 2 May 2025 12:56:31 +0200 Subject: [PATCH 0310/1764] DPL Analysis: modernize expression parsing code (#14185) --- .../Core/include/Framework/Expressions.h | 31 +++--- Framework/Core/src/Expressions.cxx | 100 ++++++++---------- 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index af89e56f85835..9e00388ee5df8 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -69,6 +69,7 @@ struct ExpressionInfo { namespace o2::framework::expressions { +void unknownParameterUsed(const char* name); const char* stringType(atype::type t); template @@ -147,7 +148,7 @@ struct PlaceholderNode : LiteralNode { if constexpr (variant_trait_v::type> != VariantType::Unknown) { retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; } else { - runtime_error("Unknown parameter used in expression."); + unknownParameterUsed(name.c_str()); } } @@ -188,6 +189,19 @@ struct ParameterNode : LiteralNode { struct ConditionalNode { }; +/// concepts +template +concept is_literal_like = std::same_as || std::same_as || std::same_as; + +template +concept is_binding = std::same_as; + +template +concept is_operation = std::same_as; + +template +concept is_conditional = std::same_as; + /// A generic tree node struct Node { Node(LiteralNode&& v) : self{std::forward(v)}, left{nullptr}, right{nullptr}, condition{nullptr} @@ -267,7 +281,7 @@ struct NodeRecord { /// Tree-walker helper template -void walk(Node* head, L const& pred) +void walk(Node* head, L&& pred) { std::stack path; path.emplace(head, 0); @@ -512,16 +526,15 @@ inline Node binned(std::vector const& binning, std::vector const& paramete } template -Node updateParameters(Node const& pexp, int bins, std::vector const& parameters, int bin) +inline Node updateParameters(Node const& pexp, int bins, std::vector const& parameters, int bin) { Node result{pexp}; - auto updateParameter = [&bins, ¶meters, &bin](Node* node) { + walk(&result, [&bins, ¶meters, &bin](Node* node) { if (node->self.index() == 5) { auto* n = std::get_if<5>(&node->self); n->reset(parameters[n->index * bins + bin]); } - }; - walk(&result, updateParameter); + }); return result; } @@ -594,12 +607,6 @@ gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr r /// Update placeholder nodes from context void updatePlaceholders(Filter& filter, InitContext& context); -template -std::vector makeProjectors(framework::pack) -{ - return {C::Projector()...}; -} - std::shared_ptr createProjectorHelper(size_t nColumns, expressions::Projector* projectors, std::shared_ptr schema, std::vector> const& fields); diff --git a/Framework/Core/src/Expressions.cxx b/Framework/Core/src/Expressions.cxx index 6f646515b7837..94649f8639a0a 100644 --- a/Framework/Core/src/Expressions.cxx +++ b/Framework/Core/src/Expressions.cxx @@ -24,6 +24,10 @@ using namespace o2::framework; namespace o2::framework::expressions { +void unknownParameterUsed(const char* name) +{ + runtime_error_f("Unknown parameter used in expression: %s", name); +} /// a map between BasicOp and gandiva node definitions /// note that logical 'and' and 'or' are created separately @@ -89,43 +93,41 @@ size_t Filter::designateSubtrees(Node* node, size_t index) return index; } -namespace +template +constexpr inline auto makeDatum(T const&) { -struct LiteralNodeHelper { - DatumSpec operator()(LiteralNode const& node) const - { - return DatumSpec{node.value, node.type}; - } -}; + return DatumSpec{}; +} -struct BindingNodeHelper { - DatumSpec operator()(BindingNode const& node) const - { - return DatumSpec{node.name, node.hash, node.type}; - } -}; +template +constexpr inline auto makeDatum(T const& node) +{ + return DatumSpec{node.value, node.type}; +} -struct OpNodeHelper { - ColumnOperationSpec operator()(OpNode const& node) const - { - return ColumnOperationSpec{node.op}; - } -}; +template +constexpr inline auto makeDatum(T const& node) +{ + return DatumSpec{node.name, node.hash, node.type}; +} -struct PlaceholderNodeHelper { - DatumSpec operator()(PlaceholderNode const& node) const - { - return DatumSpec{node.value, node.type}; - } -}; +template +constexpr inline auto makeOp(T const&, size_t const&) +{ + return ColumnOperationSpec{}; +} -struct ParameterNodeHelper { - DatumSpec operator()(ParameterNode const& node) const - { - return DatumSpec{node.value, node.type}; - } -}; -} // namespace +template +constexpr inline auto makeOp(T const& node, size_t const& index) +{ + return ColumnOperationSpec{node.op, index}; +} + +template +constexpr inline auto makeOp(T const&, size_t const& index) +{ + return ColumnOperationSpec{BasicOp::Conditional, index}; +} std::shared_ptr concreteArrowType(atype::type type) { @@ -169,7 +171,7 @@ std::string upcastTo(atype::type f) case atype::DOUBLE: return "castFLOAT8"; default: - throw runtime_error_f("Do not know how to cast to %d", f); + throw runtime_error_f("Do not know how to cast to %s", stringType(f)); } } @@ -196,13 +198,11 @@ std::ostream& operator<<(std::ostream& os, DatumSpec const& spec) void updatePlaceholders(Filter& filter, InitContext& context) { - auto updateNode = [&](Node* node) { + expressions::walk(filter.node.get(), [&](Node* node) { if (node->self.index() == 3) { std::get_if<3>(&node->self)->reset(context); } - }; - - expressions::walk(filter.node.get(), updateNode); + }); } const char* stringType(atype::type t) @@ -246,12 +246,7 @@ Operations createOperations(Filter const& expression) auto processLeaf = [](Node const* const node) { return std::visit( - overloaded{ - [lh = LiteralNodeHelper{}](LiteralNode const& node) { return lh(node); }, - [bh = BindingNodeHelper{}](BindingNode const& node) { return bh(node); }, - [ph = PlaceholderNodeHelper{}](PlaceholderNode const& node) { return ph(node); }, - [pr = ParameterNodeHelper{}](ParameterNode const& node) { return pr(node); }, - [](auto&&) { return DatumSpec{}; }}, + [](auto const& n) { return makeDatum(n); }, node->self); }; @@ -266,10 +261,7 @@ Operations createOperations(Filter const& expression) // create operation spec, pop the node and add its children auto operationSpec = std::visit( - overloaded{ - [&](OpNode node) { return ColumnOperationSpec{node.op, top.node_ptr->index}; }, - [&](ConditionalNode) { return ColumnOperationSpec{BasicOp::Conditional, top.node_ptr->index}; }, - [](auto&&) { return ColumnOperationSpec{}; }}, + [&](auto const& n) { return makeOp(n, top.node_ptr->index); }, top.node_ptr->self); operationSpec.result = DatumSpec{top.index, operationSpec.type}; @@ -623,15 +615,15 @@ gandiva::NodePtr createExpressionTree(Operations const& opSpecs, auto rightNode = datumNode(it->right); auto condNode = datumNode(it->condition); - auto insertUpcastNode = [&](gandiva::NodePtr node, atype::type t) { - if (t != it->type) { - auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(it->type), {node}, concreteArrowType(it->type)); + auto insertUpcastNode = [](gandiva::NodePtr node, atype::type t0, atype::type t) { + if (t != t0) { + auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t0), {node}, concreteArrowType(t0)); node = upcast; } return node; }; - auto insertEqualizeUpcastNode = [&](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type t1, atype::type t2) { + auto insertEqualizeUpcastNode = [](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type t1, atype::type t2) { if (t2 > t1) { auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t2), {node1}, concreteArrowType(t2)); node1 = upcast; @@ -656,14 +648,14 @@ gandiva::NodePtr createExpressionTree(Operations const& opSpecs, default: if (it->op < BasicOp::Sqrt) { if (it->type != atype::BOOL) { - leftNode = insertUpcastNode(leftNode, it->left.type); - rightNode = insertUpcastNode(rightNode, it->right.type); + leftNode = insertUpcastNode(leftNode, it->type, it->left.type); + rightNode = insertUpcastNode(rightNode, it->type, it->right.type); } else if (it->op == BasicOp::Equal || it->op == BasicOp::NotEqual) { insertEqualizeUpcastNode(leftNode, rightNode, it->left.type, it->right.type); } temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode, rightNode}, concreteArrowType(it->type)); } else { - leftNode = insertUpcastNode(leftNode, it->left.type); + leftNode = insertUpcastNode(leftNode, it->type, it->left.type); temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode}, concreteArrowType(it->type)); } break; From e3fdb85e058e0112369e163260c6ca170e37365b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 2 May 2025 13:41:58 +0200 Subject: [PATCH 0311/1764] DPL: fix reading of booleans from branches with more than 2 baskets. (#14231) --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index 90b862e5fc8d1..28fd713112c94 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -208,7 +208,7 @@ auto readBoolValues = [](uint8_t* target, ReadOps& op, TBufferFile& rootBuffer) memset(target, 0, op.targetBuffer->size()); int readLast = 0; while (readEntries < op.rootBranchEntries) { - auto beginValue = readLast; + auto beginValue = readEntries; readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); int size = readLast * op.listSize; readEntries += readLast; From 0938b3554fdce42d98b681bf173c9484b6b8784e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 5 May 2025 15:16:58 +0200 Subject: [PATCH 0312/1764] GPU Standalone: Build OrtInterface only when ONNX is available --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index c112be6abac11..8ce95c8e96d3a 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -139,7 +139,6 @@ endif() add_subdirectory(../../ GPU) add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/Common/ML/src/OrtInterface.cxx ${O2_DIR}/Common/Utils/src/StringUtils.cxx ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -228,6 +227,7 @@ endif() if(GPUCA_CONFIG_ONNX) target_link_libraries(standalone_support PRIVATE onnxruntime::onnxruntime) + target_sources(standalone_support PRIVATE ${O2_DIR}/Common/ML/src/OrtInterface.cxx) endif() if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index f82c537956ead..f6866bb80da05 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -13,7 +13,7 @@ /// \author David Rohr // Run e.g. as (replacing [FILE] and [OUTPUT]: -// echo -e '#define PARAMETER_FILE "[FILE]]"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("[OUTPUT]")\n.q\n' | root -l -b +// echo -e '#define PARAMETER_FILE "[FILE]"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("[OUTPUT]")\n.q\n' | root -l -b // To dump the defaults for AMPERE architecture, run // echo -e '#define GPUCA_GPUTYPE_AMPERE\n#define PARAMETER_FILE "GPUDefParametersDefaults.h"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("default_AMPERE.par")\n.q\n' | root -l -b From e786dc0644d91601d319b6a2bc25aed40eb12769 Mon Sep 17 00:00:00 2001 From: Hadi Hassan Date: Tue, 6 May 2025 03:03:10 +0300 Subject: [PATCH 0313/1764] [FOCAL-55] Open the detector in the middle in x (#14232) * Open the detector in the middle in x * Formatting --- .../FOCAL/base/include/FOCALBase/Geometry.h | 6 ++ Detectors/FOCAL/base/src/Geometry.cxx | 14 +++- .../geometryFiles/geometry_Spaghetti.txt | 4 ++ Detectors/FOCAL/simulation/src/Detector.cxx | 72 +++++++++++++++---- 4 files changed, 80 insertions(+), 16 deletions(-) diff --git a/Detectors/FOCAL/base/include/FOCALBase/Geometry.h b/Detectors/FOCAL/base/include/FOCALBase/Geometry.h index 4938ebb1925dd..770c0aa3c1cf5 100644 --- a/Detectors/FOCAL/base/include/FOCALBase/Geometry.h +++ b/Detectors/FOCAL/base/include/FOCALBase/Geometry.h @@ -135,6 +135,9 @@ class Geometry bool getInsertFrontPadLayers() const { return mInsertFrontPadLayers; } bool getInsertHCalReadoutMaterial() const { return mInsertFrontHCalReadoutMaterial; } + float getDetectorOpeningRight() const { return mGlobal_DetectorOpening_Right; } + float getDetectorOpeningLeft() const { return mGlobal_DetectorOpening_Left; } + std::vector getFOCALMicroModule(int layer) const; const Composition* getComposition(int layer, int stack) const; std::string_view getTowerGapMaterial() const { return mGlobal_Gap_Material; } @@ -175,6 +178,9 @@ class Geometry float mWaferSizeX = 0.0; // Wafer X size float mWaferSizeY = 0.0; // Wafer Y size + float mGlobal_DetectorOpening_Right = 0.0; // detector opening in X + float mGlobal_DetectorOpening_Left = 0.0; // detector opening in Y + // PIX setup float mGlobal_Pixel_Size = 0.0; // pixel size float mGlobal_PIX_SizeX = 0.0; // sensor size X diff --git a/Detectors/FOCAL/base/src/Geometry.cxx b/Detectors/FOCAL/base/src/Geometry.cxx index 94d8c2cee049c..db47816ba8537 100644 --- a/Detectors/FOCAL/base/src/Geometry.cxx +++ b/Detectors/FOCAL/base/src/Geometry.cxx @@ -351,6 +351,16 @@ void Geometry::setParameters(std::string geometryfile) LOG(debug) << "Z-Location of the FoCAL is set to : " << mGlobal_FOCAL_Z0; } + if (command.find("DetectorOpen_Right") != std::string::npos) { + mGlobal_DetectorOpening_Right = std::stof(tokens[1]); + LOG(debug) << "Detector opening on the right : " << mGlobal_DetectorOpening_Right; + } + + if (command.find("DetectorOpen_Left") != std::string::npos) { + mGlobal_DetectorOpening_Left = std::stof(tokens[1]); + LOG(debug) << "Detector opening on the left : " << mGlobal_DetectorOpening_Left; + } + if (command.find("HCAL_TOWER_SIZE") != std::string::npos) { mGlobal_HCAL_Tower_Size = std::stof(tokens[1]); LOG(debug) << "The size of the HCAL readout tower will be : " << mGlobal_HCAL_Tower_Size; @@ -578,8 +588,8 @@ void Geometry::setParameters(std::string geometryfile) } } } // end for itowerY - } // end for itowerX - } // end else + } // end for itowerX + } // end else center_z += tmpComp.getThickness(); } // end loop over pad layer compositions LOG(debug) << "============ Created all pad layer compositions (" << mPadCompositionBase.size() << " volumes)"; diff --git a/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt b/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt index 5209204cc5eca..31f6940224337 100644 --- a/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt +++ b/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt @@ -59,6 +59,10 @@ COMMAND_INSERT_PIX_AT_L9 GLOBAL_TOWER_TOLX 0.02 Air GLOBAL_TOWER_TOLY 0.8 Al GLOBAL_FOCAL_Z 763.5 +# Open the detector on the right and left in cm, +# can only work if the GLOBAL_HCAL_TOWER_NY is odd number and GLOBAL_HCAL_TOWER_NX is even number +# GLOBAL_DetectorOpen_Right 5 +# GLOBAL_DetectorOpen_Left 5 GLOBAL_Tower_NX 2 GLOBAL_Tower_NY 11 GLOBAL_MIDDLE_TOWER_OFFSET 5 diff --git a/Detectors/FOCAL/simulation/src/Detector.cxx b/Detectors/FOCAL/simulation/src/Detector.cxx index 08df253d49f83..dc71c1066afdf 100644 --- a/Detectors/FOCAL/simulation/src/Detector.cxx +++ b/Detectors/FOCAL/simulation/src/Detector.cxx @@ -539,6 +539,8 @@ void Detector::CreateHCALSpaghetti() } } + bool splitDet = mGeometry->getDetectorOpeningRight() > 0.0 || mGeometry->getDetectorOpeningLeft() > 0.0; + double TowerSize = mGeometry->getHCALTowerSize(); double CuBoxThickness = 0.3; // Thickness of the Cu box carrying capillary tubes @@ -598,25 +600,57 @@ void Detector::CreateHCALSpaghetti() Columns = 0; RowPos = 0.; Int_t NumTowers = 1; - for (Rows = 0; Rows < nTowersY; Rows++) { - float ColumnPos = 0.; - RowPos = Rows * TowerSize; - for (Columns = 0; Columns < nTowersX; Columns++) { - ColumnPos = Columns * TowerSize; - TGeoTranslation* trans = new TGeoTranslation(ColumnPos - SizeXHCAL / 2 + TowerSize / 2, RowPos - SizeYHCAL / 2 + TowerSize / 2, 0.); + if (splitDet) { + SizeXHCAL = SizeXHCAL / 2; - // Remove the Towers that overlaps with the beam pipe - Double_t RadialDistance = TMath::Power(trans->GetTranslation()[0], 2) + TMath::Power(trans->GetTranslation()[1], 2); + TGeoVolumeAssembly* volHalfHCAL = new TGeoVolumeAssembly("HalfHCAL"); - if (RadialDistance < MinRadius * MinRadius || TMath::Abs(trans->GetTranslation()[0]) > SizeXHCAL / 2) { - continue; + for (Rows = 0; Rows < nTowersY; Rows++) { + + float ColumnPos = 0.; + RowPos = Rows * TowerSize; + for (Columns = 0; Columns < nTowersX / 2; Columns++) { + ColumnPos = Columns * TowerSize; + TGeoTranslation* trans = new TGeoTranslation(ColumnPos - SizeXHCAL / 2 + TowerSize / 2, RowPos - SizeYHCAL / 2 + TowerSize / 2, 0.); + + // Shit the beampipe towers by TowerSize/2 + if (Rows == nTowersY / 2) { + trans->SetDx(trans->GetTranslation()[0] + TowerSize / 2); + } + + // Adding the Tower to the HCAL + volHalfHCAL->AddNode(volTowerHCAL, NumTowers, trans); + + NumTowers++; } + volHCAL->AddNode(volHalfHCAL, 1, new TGeoTranslation(SizeXHCAL / 2 + mGeometry->getDetectorOpeningRight(), 0, 0)); + TGeoRotation* rotFlipZ = new TGeoRotation(); + rotFlipZ->RotateY(180); // Flip around Y to reverse Z + TGeoCombiTrans* combHalf = new TGeoCombiTrans(-SizeXHCAL / 2 - mGeometry->getDetectorOpeningLeft(), 0., 0., rotFlipZ); + volHCAL->AddNode(volHalfHCAL, 2, combHalf); + } + } else { + for (Rows = 0; Rows < nTowersY; Rows++) { - // Adding the Tower to the HCAL - volHCAL->AddNode(volTowerHCAL, NumTowers, trans); + float ColumnPos = 0.; + RowPos = Rows * TowerSize; + for (Columns = 0; Columns < nTowersX; Columns++) { + ColumnPos = Columns * TowerSize; + TGeoTranslation* trans = new TGeoTranslation(ColumnPos - SizeXHCAL / 2 + TowerSize / 2, RowPos - SizeYHCAL / 2 + TowerSize / 2, 0.); - NumTowers++; + // Remove the Towers that overlaps with the beam pipe + Double_t RadialDistance = TMath::Power(trans->GetTranslation()[0], 2) + TMath::Power(trans->GetTranslation()[1], 2); + + if (RadialDistance < MinRadius * MinRadius || TMath::Abs(trans->GetTranslation()[0]) > SizeXHCAL / 2) { + continue; + } + + // Adding the Tower to the HCAL + volHCAL->AddNode(volTowerHCAL, NumTowers, trans); + + NumTowers++; + } } } @@ -791,6 +825,8 @@ void Detector::CreateECALGeometry() // this shifts all the pixel layers to the center near the beampipe double pixshift = geom->getTowerSizeX() - (geom->getGlobalPixelWaferSizeX() * geom->getNumberOfPIXsInX()); + bool splitDet = mGeometry->getDetectorOpeningRight() > 0.0 || mGeometry->getDetectorOpeningLeft() > 0.0; + float offset = pars[2]; // gMC->Gsvolu("EMSC1", "BOX", idtmed[3698], pars, 4);//Left towers (pixels shifted right) // gMC->Gsvolu("EMSC2", "BOX", idtmed[3698], pars, 4);//Right towers (pixels shifted left) @@ -977,9 +1013,13 @@ void Detector::CreateECALGeometry() // const auto towerCenter = geom->getGeoTowerCenter(number); //only ECAL part, second parameter = -1 by default // xp = std::get<0>towerCenter; // std::tie(xp, yp, zp) = geom->getGeoTowerCenter(number); - const auto [xp, yp, zp] = geom->getGeoTowerCenter(number); // only ECAL part, second parameter = -1 by default + auto [xp, yp, zp] = geom->getGeoTowerCenter(number); // only ECAL part, second parameter = -1 by default if (itowerx == 0) { + if (splitDet) { + xp -= geom->getDetectorOpeningLeft(); + } + TVirtualMC::GetMC()->Gspos("EMSC1", number + 1, "ECAL", xp, yp, 0, 0, "ONLY"); // Add the SiPad front volumes directly under the FOCAL volume if (geom->getInsertFrontPadLayers()) { @@ -992,6 +1032,10 @@ void Detector::CreateECALGeometry() } } if (itowerx == 1) { + if (splitDet) { + xp += geom->getDetectorOpeningRight(); + } + TVirtualMC::GetMC()->Gspos("EMSC2", number + 1, "ECAL", xp, yp, 0, 0, "ONLY"); // Add the SiPad front volumes directly under the FOCAL volume if (geom->getInsertFrontPadLayers()) { From 69f1fd10feb52387174f8b5024d7a5afbdf02dd2 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Mon, 5 May 2025 22:00:05 +0800 Subject: [PATCH 0314/1764] ITS3: Fix APTS response file generation Was unnecessarily generated on every built. --- Detectors/Upgrades/ITS3/data/CMakeLists.txt | 29 ++++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/Detectors/Upgrades/ITS3/data/CMakeLists.txt b/Detectors/Upgrades/ITS3/data/CMakeLists.txt index ba8b60c8aa7eb..7a807fd670370 100644 --- a/Detectors/Upgrades/ITS3/data/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/data/CMakeLists.txt @@ -9,17 +9,26 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. +set(APTS_RESPONSE_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root") + +add_custom_command( + OUTPUT ${APTS_RESPONSE_OUTPUT} + COMMAND ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator + -c APTS + -i ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ + -o ${CMAKE_CURRENT_BINARY_DIR}/ + DEPENDS GenerateAlpideResponse + ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ + COMMENT "Generating APTSResponseData.root" + VERBATIM +) + add_custom_target( GenerateAPTSResponse ALL - COMMAND - ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator -c APTS -i - ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ -o - ${CMAKE_CURRENT_BINARY_DIR}/ - BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root - DEPENDS GenerateAlpideResponse - COMMENT "Generating APTSResponseData.root") + DEPENDS ${APTS_RESPONSE_OUTPUT} +) + install( - FILES "${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root" - DESTINATION - "${CMAKE_INSTALL_PREFIX}/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/" + FILES ${APTS_RESPONSE_OUTPUT} + DESTINATION "${CMAKE_INSTALL_PREFIX}/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/" ) From 15b4f5f19e1eef23d79bdb3225e36348a845722d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 6 May 2025 08:56:19 +0200 Subject: [PATCH 0315/1764] DPL Analysis: prevent slice cache from updating when not required by enabled process functions (#14057) --- Framework/Core/include/Framework/ASoA.h | 12 ++--- .../Core/include/Framework/AnalysisManagers.h | 14 ++++-- .../Core/include/Framework/AnalysisTask.h | 22 ++++----- .../Framework/ArrowTableSlicingCache.h | 45 +++++++++++------ .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 +++---- Framework/Core/src/ArrowTableSlicingCache.cxx | 49 ++++++++++--------- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 96 insertions(+), 73 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index e098cd89f6d5d..2e478a8ca64a6 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - StringPair bindingKey; + Entry bindingKey; bool isMissing() const; - StringPair const& getBindingKey() const; + Entry const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 330eaf01f0be4..e310f3eef990c 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,39 +534,43 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, std::vector&, std::vector&) +bool registerCache(T&, Cache&, Cache&) { return false; } template requires std::same_as -bool registerCache(T& preslice, std::vector& bsks, std::vector&) +bool registerCache(T& preslice, Cache& bsks, Cache&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, std::vector&, std::vector& bsksU) +bool registerCache(T& preslice, Cache&, Cache& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index c7f3da1948c62..9bd2e2af173cc 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -66,20 +66,20 @@ concept is_enumeration = is_enumeration_v>; namespace { struct AnalysisDataProcessorBuilder { template - static void addGroupingCandidates(std::vector& bk, std::vector& bku) + static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) { - [&bk, &bku](framework::pack) mutable { + [&bk, &bku, enabled](framework::pack) mutable { std::string key; if constexpr (soa::is_iterator>) { key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); } - ([&bk, &bku, &key]() mutable { + ([&bk, &bku, &key, enabled]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key); + framework::updatePairList(bku, binding, key, enabled); } else { - framework::updatePairList(bk, binding, key); + framework::updatePairList(bk, binding, key, enabled); } } }(), @@ -147,7 +147,7 @@ struct AnalysisDataProcessorBuilder { /// helper to parse the process arguments /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, Cache&, Cache&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. @@ -156,17 +156,17 @@ struct AnalysisDataProcessorBuilder { /// 2. grouping case - 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache& bk, Cache& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku); + addGroupingCandidates(bk, bku, value); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache&, Cache&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); @@ -480,8 +480,8 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 2edc23a63ce76..292a67023fc5e 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,51 +34,64 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -using StringPair = std::pair; +struct Entry { + std::string binding; + std::string key; + bool enabled; + + Entry(std::string b, std::string k, bool e = true) + : binding{b}, + key{k}, + enabled{e} + { + } +}; + +using Cache = std::vector; -void updatePairList(std::vector& list, std::string const& binding, std::string const& key); +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; - void setCaches(std::vector&& bsks); - void setCachesUnsorted(std::vector&& bsks); + void setCaches(Cache&& bsks); + void setCachesUnsorted(Cache&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - std::vector bindingsKeys; + Cache bindingsKeys; std::vector>> values; std::vector>> counts; - std::vector bindingsKeysUnsorted; + Cache bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); // set caching information externally - void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(StringPair const& bindingKey) const; - int getCachePosSortedFor(StringPair const& bindingKey) const; - int getCachePosUnsortedFor(StringPair const& bindingKey) const; + std::pair getCachePos(Entry const& bindingKey) const; + int getCachePosSortedFor(Entry const& bindingKey) const; + int getCachePosUnsortedFor(Entry const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; + SliceInfoPtr getCacheFor(Entry const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); + static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index 64b1d863c59e6..b8436314b057e 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = std::make_pair(binding, mIndexColumnName); + auto bk = Entry(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 810398747de88..5940bc0427225 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -197,7 +197,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -StringPair const& PreslicePolicyBase::getBindingKey() const +Entry const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 12a4c7131e828..3b13e30581f70 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,26 +567,27 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, + Cache{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0; i < caches.size(); ++i) { - if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < caches.size(); ++i) { + if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0; i < unsortedCaches.size(); ++i) { - if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < unsortedCaches.size(); ++i) { + if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 4b31f96e32fba..12df5ef6c080b 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -11,6 +11,7 @@ #include "Framework/ArrowTableSlicingCache.h" #include "Framework/RuntimeError.h" +#include "Framework/Logger.h" #include #include @@ -19,10 +20,10 @@ namespace o2::framework { -void updatePairList(std::vector& list, std::string const& binding, std::string const& key) +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { - list.emplace_back(binding, key); + if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }) == list.end()) { + list.emplace_back(binding, key, enabled); } } @@ -65,17 +66,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -86,7 +87,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, s groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -111,7 +112,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -128,7 +129,11 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k] = bindingsKeysUnsorted[pos]; + auto& [b, k, e] = bindingsKeysUnsorted[pos]; + if (!e) { + LOG(debug) << "Update of disabled cache requested"; + return arrow::Status::OK(); + } auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -139,7 +144,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if (groups[pos].size() <= v) { + if ((int)groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -151,7 +156,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -161,41 +166,41 @@ std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindi if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheUnsortedForPos(p); @@ -224,9 +229,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key] = bindingKey; + auto const& [target, key, enabled] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 161939141e790..091c21eeae229 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From a8f5897522519647699a774697325e5e663619f5 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Tue, 6 May 2025 11:43:50 +0200 Subject: [PATCH 0316/1764] Remove tmp file (#14239) Trivial and unaffecting anything else, merging. --- .../src/.ThresholdCalibratorSpec.cxx.swo | Bin 16384 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 Detectors/ITSMFT/ITS/workflow/src/.ThresholdCalibratorSpec.cxx.swo diff --git a/Detectors/ITSMFT/ITS/workflow/src/.ThresholdCalibratorSpec.cxx.swo b/Detectors/ITSMFT/ITS/workflow/src/.ThresholdCalibratorSpec.cxx.swo deleted file mode 100644 index 847bb24d5cf5f12814270f83c821f9c726d7964b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHOUyK_^8DA&`N-2$s656V$6G!6g^v=G!w9vRo)I0m`;wX2%-uaTINpxE8j_pm> zyW82Z@7yD8Xaf&CqoN`ncqoV`gb)IWrPI$uE-^~q%Xo;sj@sjTcMBKnlH}R zTz@SW3M*G>2p%?k*Dj!DRhTHe9B?aZt*>v>Vj@ZyNEqlbaEEbVazZWjeW|_d(|4`( zM9H^=frNpCfrNpCfrNpCfrNpCfrNqodj@21hw%(X`C&cMK!1K{;PX9wx%&Q-1M+vZ zJg)!80r?dzZ|eK=x`DWz|7!UeE&s`Y{2eWKw0w{5a9sawEw5_%GXwIswES@`e|13q zrj{Sm@`JZTUt;}lX!)X+KQ|!%kCvCT{5J#ge{14S;5Oh-cNoTH;K#s^fG-1QfYZQH;0wSwkONWx1MUKL0e1o~ebO+V2LuoR zcLRHX-N2RG4db`KuYqTQi@-C$IB*Z}^G_JYbHJB?`+?5__W{4W%`kolJPTX|Y5)fo zfjn>!xEFZu^-N5#zrNyaHSVz6fjpj{zm%R^T!c11|y= zunOz}b_1^?fAJddBJea&1g>BX-vVeZ-=O*2?qckM!3M8Ghgj;2w}1bBR&+wM>N;MX zjmS>GN0{TWCYPpdN|X658MGv;IW7-Vk0>?A%X8^9Q+TM6VWg(yQIWtDCaQ6vDvyyW z=J}}Vw>&$AnirU5N~^)rp|taPzHaeAI=;s&-wWmE7|*Z`r5~@CgkwjV^XNi_lpJP` zmGxEGFrCTPxjdvagi@djN75aTQ%V~F-r6rmdbQ{ zWi`^GBha8fna{VR~h!?N}d^ zKFj8I%5O9$W2pVU2<{*7H5l%-IF#byW7^DeL{8t_l1W`3+hh^Cmg3u~v3Mf-L~QS2 zmde?@o%33*n~JPzM^i&~MEZwLPfz3Ztb!hK?`x|1hqtrk&}(?;jCF_FGr{50CHyrk zeo4zF8aL}*wD#4ouqynz=RC#jQ5LuyFOsnqFk*9o7sjO1<|CtQ1Zwc%3pUblq=j99 z*274j(Q_eu5p5~DN$5u-`UjTk1;$+{-k@dMUAVK3)ceHIQl(H{S*Vm2=BTr}G?_`P z?=WnmuXSY|!*$J;D%DQrYfZqGo)g${T?gYIQw?9c&yjn^z?{mcF_j* z=c!VGXOhMN!Fv`Ib5N+f1UX9M&bfMt{^JWlW0i)~rJkDgY zr*vjZ^CLUm0FimtRF!=jMhJ@yU|6-RMf%h&Xow3;V;>nTLH9CS^0)1Zf9DUhDvMTxkQY^S^36G^0R!a-VSdL9**nXCtU}LN@Z^}kKKO<`C^!obg@w2!G zmkM7pD3=bO6f&(!)6I#qnT#UT_FGjPSX|Ur9oZ1+==XlMzCOz0@2H^0SgIUdqLR!B znc!_MLT*QEZ)mTsyF=`O{&CTC_jrzfMyjtM?D1s$+Vhb8TD|g&3N&o?sB%jf+p3&;H5%nW zS=g#J9Z3ViCQp8)>2=t-mT*GbQ5$NOHkGeI#yC8LHd1(8*KZ$S>AA(@>;#OJ&VmEj9xOj_&~tRB ziVlz;uT(0AQiby3DV+W(HOLBeCG=|&(F&$GWHxqt;Z$2Ras(v^Rj4xE6HdJWb||Xh>jBUgOz!^)FcgW~@c#G(%&KBf3Se*XL}V*V?@_kat)Dc~e<7}y2;3-SLyfY*S( z0)GL%1{8s5;1=M|i1D8XiohF)>AwR!0h|Ip54?jo{w3fNPy%iTUO^21E8qt}8#o7? z1?~kdA#Q&V_ygkdr+@>%WyIf?fWM*tXMiVwIe^CVAdp-M0|^5O0|^5+k%0>g$MGh= zXy6WcPMfnU8!7G~p~%NV_yhO|ob@qj))9D~FogrZgTkwPe%kllQC2e%(2uf>)C$}M zcq36&hVpgtCUAg7T5#w(VTVB%9r(6WcgPW;Q_u8F*RR8O2`$r0UBDGZf=plC?pSMM zmM6!o2Di>%UuWPKs9N2#nf@@1;g8nn%tampjs)dN;6~90`e)iU+!;757!i3DNI8V) z+9(2`-qr0G_ro3Z2WbR2q~#hClH7#b1J|hyhk=+LqS`|=z)kxq#c<|E*#r^3&}vrE zapm`@qKFuDs0rN@Lv&KoD9W=AOeMO6$wKUhVQi>$Y&X}sm72c%)KFmoCQD>3VU{qeMlXWRf3%}z(mJCPP8bUy(6XkizB+-n<890{KvkMoxKLZw9 z<&<0Z1r#(X@Yn#=ft*E}ykdsu4-Jp2kHo=wMjLj-U>I4sO0VnhDVYD5)%|PE)f7TS zmGqK+oKYB-3BYR6k!cZ+A{K>j!Q+np7*r)?bbDFe3|s_pt5wrFpQiK|2u(d!SgtJL z%Miu1KWtmKP+piVPM^5J+Q=GJZm{ZdVQS$<>noXW@KC>vu}n`V%roCflx^k60& zm{lZZbRjZoSw`H1NscCJergc|A0I{7SmOu@akxNdv;&hd4p&I#h-y7$bJLXqqNV9U zT@`3lO2aU%Cnmk7E5z5#^~;*-tAc95Ov%4u79pd#A7s75d+HZF)0lk3>;6j zXjKuBbf89*>cn4Mpx!Hiflx;0s)fjq`WwDIsA*RDv~5#wH#3#opUtAd_UDw5V#16+ z#Ul$&tGV2qHxH#Bq%+uZ0$<2Hc<$yyqlw33guOZsTp%A}#t$hwe2olxq?M8y#o{t{ mJwKo)k`3R^!?VH0)nS&4Gex-xt|>W^)^r}B!p*#hng0Qi4P(y$ From 1c7a558df3442e42971b3c4b03dace6796a6b946 Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 6 May 2025 11:44:43 +0200 Subject: [PATCH 0317/1764] Fix typos in rANS AlignedArrayIterator --- .../include/rANS/internal/containers/AlignedArray.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h b/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h index df4b0190cdc4f..c1f96df939809 100644 --- a/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h +++ b/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h @@ -107,16 +107,16 @@ class AlignedArrayIterator inline constexpr difference_type operator-(const AlignedArrayIterator& other) const noexcept { - return this->mIter - other.mIter; + return this->mIndex - other.mIndex; }; // comparison inline constexpr bool operator==(const AlignedArrayIterator& other) const noexcept { return this->mIndex == other.mIndex; }; inline constexpr bool operator!=(const AlignedArrayIterator& other) const noexcept { return this->mIndex != other.mIndex; }; - inline constexpr bool operator<(const AlignedArrayIterator& other) const noexcept { return this->mIndex < other->mIndex; }; - inline constexpr bool operator>(const AlignedArrayIterator& other) const noexcept { return this->mIndex > other->mIndex; }; - inline constexpr bool operator>=(const AlignedArrayIterator& other) const noexcept { return this->mIndex >= other->mIndex; }; - inline constexpr bool operator<=(const AlignedArrayIterator& other) const noexcept { return this->mIndex <= other->mIndex; }; + inline constexpr bool operator<(const AlignedArrayIterator& other) const noexcept { return this->mIndex < other.mIndex; }; + inline constexpr bool operator>(const AlignedArrayIterator& other) const noexcept { return this->mIndex > other.mIndex; }; + inline constexpr bool operator>=(const AlignedArrayIterator& other) const noexcept { return this->mIndex >= other.mIndex; }; + inline constexpr bool operator<=(const AlignedArrayIterator& other) const noexcept { return this->mIndex <= other.mIndex; }; // dereference inline constexpr value_type operator*() const noexcept { return (*mContainer)[mIndex]; }; @@ -311,4 +311,4 @@ auto make_span(o2::rans::internal::simd::AlignedArray& array } // namespace gsl -#endif /* RANS_INTERNAL_CONTAINERS_ALIGNEDARRAY_H_ */ \ No newline at end of file +#endif /* RANS_INTERNAL_CONTAINERS_ALIGNEDARRAY_H_ */ From c4f4364b1c819dac3581db77f89c0968c661c7d3 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 6 May 2025 17:25:36 +0200 Subject: [PATCH 0318/1764] DPL: improve DataSpecUtils::describe API in case of buffers (#14238) Just like snprintf, it makes sense to return the size of the formatted output. --- .../Core/include/Framework/DataSpecUtils.h | 17 +++--- Framework/Core/src/DataSpecUtils.cxx | 58 ++++++++----------- .../Core/test/unittest_DataSpecUtils.cxx | 55 ++++++++++++++++++ 3 files changed, 88 insertions(+), 42 deletions(-) diff --git a/Framework/Core/include/Framework/DataSpecUtils.h b/Framework/Core/include/Framework/DataSpecUtils.h index 65f8585302aa7..588aa30da7e08 100644 --- a/Framework/Core/include/Framework/DataSpecUtils.h +++ b/Framework/Core/include/Framework/DataSpecUtils.h @@ -18,11 +18,12 @@ #include -namespace o2 -{ -namespace framework +namespace o2::framework { +template +concept HasMatcher = requires(T& t) { t.matcher; }; + struct DataSpecUtils { /// @return true if a given InputSpec @a spec matches with a @a target ConcreteDataMatcher static bool match(InputSpec const& spec, ConcreteDataMatcher const& target); @@ -152,10 +153,8 @@ struct DataSpecUtils { static bool validate(OutputSpec const& output); /// Same as the other describe, but uses a buffer to reduce memory churn. - static void describe(char* buffer, size_t size, InputSpec const& spec); - - /// Same as the other describe, but uses a buffer to reduce memory churn. - static void describe(char* buffer, size_t size, OutputSpec const& spec); + template + static size_t describe(char* buffer, size_t size, T const& spec); /// If possible extract the ConcreteDataMatcher from an InputSpec. This /// can be done either if the InputSpec is defined in terms for a ConcreteDataMatcher @@ -250,6 +249,6 @@ struct DataSpecUtils { static void updateOutputList(std::vector& list, OutputSpec&& input); }; -} // namespace framework -} // namespace o2 +} // namespace o2::framework + #endif // FRAMEWORK_DATASPECUTILS_H diff --git a/Framework/Core/src/DataSpecUtils.cxx b/Framework/Core/src/DataSpecUtils.cxx index 3babbaba2a6ca..48f5e6abcad5b 100644 --- a/Framework/Core/src/DataSpecUtils.cxx +++ b/Framework/Core/src/DataSpecUtils.cxx @@ -15,11 +15,13 @@ #include "Framework/RuntimeError.h" #include "Headers/DataHeaderHelpers.h" +#include #include #include #include #include #include +#include namespace o2::framework { @@ -87,39 +89,29 @@ std::string DataSpecUtils::describe(OutputSpec const& spec) spec.matcher); } -void DataSpecUtils::describe(char* buffer, size_t size, InputSpec const& spec) -{ - if (auto concrete = std::get_if(&spec.matcher)) { - char origin[5]; - origin[4] = 0; - char description[17]; - description[16] = 0; - snprintf(buffer, size, "%s/%s/%" PRIu32, (strncpy(origin, concrete->origin.str, 4), origin), - (strncpy(description, concrete->description.str, 16), description), concrete->subSpec); - } else if (auto matcher = std::get_if(&spec.matcher)) { - std::ostringstream ss; - ss << ""; - strncpy(buffer, ss.str().c_str(), size - 1); - } else { - throw runtime_error("Unsupported InputSpec"); - } -} - -void DataSpecUtils::describe(char* buffer, size_t size, OutputSpec const& spec) -{ - if (auto concrete = std::get_if(&spec.matcher)) { - char origin[5]; - origin[4] = 0; - char description[17]; - description[16] = 0; - snprintf(buffer, size, "%s/%s/%" PRIu32, (strncpy(origin, concrete->origin.str, 4), origin), - (strncpy(description, concrete->description.str, 16), description), concrete->subSpec); - } else if (auto concrete = std::get_if(&spec.matcher)) { - fmt::format_to(buffer, "", concrete->origin, concrete->description); - } else { - throw runtime_error("Unsupported OutputSpec"); - } -} +template +size_t DataSpecUtils::describe(char* buffer, size_t size, T const& spec) +{ + auto result = std::visit(overloaded{ + [buffer, size](ConcreteDataMatcher const& concrete) -> fmt::format_to_n_result { + return fmt::format_to_n(buffer, size - 1, "{:.4}/{:.16}/{}", concrete.origin.str, concrete.description.str, concrete.subSpec); + }, + [buffer, size](ConcreteDataTypeMatcher const& concrete) -> fmt::format_to_n_result { + return fmt::format_to_n(buffer, size - 1, "", concrete.origin, concrete.description); + }, + [buffer, size](DataDescriptorMatcher const& matcher) -> fmt::format_to_n_result { + std::ostringstream ss; + ss << ""; + return fmt::format_to_n(buffer, size - 1, "{}", ss.str()); + }, + [](...) -> fmt::format_to_n_result { throw std::runtime_error("Unsupported Input / Output Spec"); }}, + spec.matcher); + *result.out = '\0'; + return result.out - buffer; +} + +template size_t DataSpecUtils::describe(char* buffer, size_t size, InputSpec const& spec); +template size_t DataSpecUtils::describe(char* buffer, size_t size, OutputSpec const& spec); std::string DataSpecUtils::label(InputSpec const& spec) { diff --git a/Framework/Core/test/unittest_DataSpecUtils.cxx b/Framework/Core/test/unittest_DataSpecUtils.cxx index e6b2f4a22c018..6128183aefa11 100644 --- a/Framework/Core/test/unittest_DataSpecUtils.cxx +++ b/Framework/Core/test/unittest_DataSpecUtils.cxx @@ -42,6 +42,7 @@ TEST_CASE("ConcreteData") CHECK(std::string(concrete.description.as()) == "FOOO"); CHECK(concrete.subSpec == 1); CHECK(DataSpecUtils::describe(spec) == "TEST/FOOO/1"); + CHECK(DataSpecUtils::describe(spec) == "TEST/FOOO/1"); CHECK(*DataSpecUtils::getOptionalSubSpec(spec) == 1); ConcreteDataTypeMatcher dataType = DataSpecUtils::asConcreteDataTypeMatcher(spec); @@ -59,6 +60,44 @@ TEST_CASE("ConcreteData") } } +TEST_CASE("DescribeUsingBuffer") +{ + o2::framework::clean_all_runtime_errors(); + OutputSpec spec{ + "TEST", + "FOOO", + 1, + Lifetime::Timeframe}; + + InputSpec inputSpec{ + "binding", + "TEST", + "FOOO", + 1, + Lifetime::Timeframe}; + + REQUIRE(DataSpecUtils::validate(inputSpec)); + + { + char buffer[1024]; + + ConcreteDataMatcher concrete = DataSpecUtils::asConcreteDataMatcher(spec); + CHECK(std::string(concrete.origin.as()) == "TEST"); + CHECK(std::string(concrete.description.as()) == "FOOO"); + CHECK(concrete.subSpec == 1); + auto size = DataSpecUtils::describe(buffer, 1024, spec); + CHECK(std::string_view(buffer, size) == "TEST/FOOO/1"); + size = DataSpecUtils::describe(buffer, 1024, spec); + CHECK(std::string_view(buffer, size) == "TEST/FOOO/1"); + CHECK(*DataSpecUtils::getOptionalSubSpec(spec) == 1); + + char buffer2[1024]; + size = DataSpecUtils::describe(buffer2, 5, spec); + // We always nullterminate the string + CHECK(std::string_view(buffer2, size) == "TEST"); + } +} + TEST_CASE("WithWildCards") { OutputSpec spec{ @@ -78,6 +117,22 @@ TEST_CASE("WithWildCards") CHECK(DataSpecUtils::getOptionalSubSpec(spec) == std::nullopt); } +TEST_CASE("WithWildCardsBuffer") +{ + char buffer[1024]; + OutputSpec spec{ + {"TEST", "FOOO"}, + Lifetime::Timeframe}; + + auto size = DataSpecUtils::describe(buffer, 1024, spec); + CHECK(std::string_view(buffer, size) == ""); + + char buffer2[1024]; + size = DataSpecUtils::describe(buffer2, 5, spec); + // We always null terminate the buffer. + CHECK(std::string_view(buffer2, size) == " Date: Tue, 6 May 2025 17:26:41 +0200 Subject: [PATCH 0319/1764] ITS3: ITS3 Digitisation Development after TDR (#14145) * ITS3 digitization: parameters, segmentation and container fixes - Add digitization parameter sets - Fix C2F/F2C conversion in SegmentationMosaix - Set scale function for Alpide as IB - Fix chip digits container initialization - Correct ordering of maxRows and maxCols - Add support for floating row/column numbers in D2L * Introduce ChipSimResponse with response-centre extraction logic Add more info in CreateDic macro * Add a macro to compare ITS3 clusters and digits on a pixel array Add the script to check hits and clusters on a track Add script for visualizing chip responses * Address reviewer comments --- .../ITSMFTSimulation/AlpideSimResponse.h | 5 +- .../include/ITS3Base/SegmentationMosaix.h | 89 +-- .../ITS3/base/include/ITS3Base/SpecsV2.h | 1 - .../Upgrades/ITS3/macros/test/CMakeLists.txt | 3 + .../ITS3/macros/test/CheckChipResponseFile.C | 192 ++++++ .../ITS3/macros/test/CheckDigitsITS3.C | 2 - .../test/CompareClustersAndDigitsOnChip.C | 579 ++++++++++++++++ .../ITS3/macros/test/CorrTracksClusters.C | 638 ++++++++++++++++++ .../ITS3/macros/test/CreateDictionariesITS3.C | 22 +- .../Upgrades/ITS3/simulation/CMakeLists.txt | 8 +- .../ITS3Simulation/ChipDigitsContainer.h | 59 ++ .../include/ITS3Simulation/ChipSimResponse.h | 41 ++ .../include/ITS3Simulation/DigiParams.h | 28 +- .../include/ITS3Simulation/Digitizer.h | 9 +- .../ITS3Simulation/ITS3DPLDigitizerParam.h | 32 + .../simulation/src/ChipDigitsContainer.cxx | 63 ++ .../ITS3/simulation/src/ChipSimResponse.cxx | 62 ++ .../ITS3/simulation/src/DigiParams.cxx | 62 +- .../ITS3/simulation/src/Digitizer.cxx | 84 ++- .../simulation/src/ITS3DPLDigitizerParam.cxx | 14 + .../simulation/src/ITS3SimulationLinkDef.h | 3 + .../src/ITS3DigitizerSpec.cxx | 7 + 22 files changed, 1886 insertions(+), 117 deletions(-) create mode 100644 Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C create mode 100644 Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C create mode 100644 Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h create mode 100644 Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx create mode 100644 Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx create mode 100644 Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx diff --git a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h index 92656a16257a1..5714b51d5aa45 100644 --- a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h +++ b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h @@ -38,7 +38,7 @@ class AlpideRespSimMat static int constexpr getNPix() { return NPix; } AlpideRespSimMat() = default; - ~AlpideRespSimMat() = default; + virtual ~AlpideRespSimMat() = default; void adopt(const AlpideRespSimMat& src, bool flipRow = false, bool flipCol = false) { @@ -69,7 +69,7 @@ class AlpideRespSimMat private: std::array data; - ClassDefNV(AlpideRespSimMat, 1); + ClassDef(AlpideRespSimMat, 1); }; /* @@ -91,6 +91,7 @@ class AlpideSimResponse int getDepthBin(float pos) const; std::string composeDataName(int colBin, int rowBin); + protected: int mNBinCol = 0; /// number of bins in X(col direction) int mNBinRow = 0; /// number of bins in Y(row direction) int mNBinDpt = 0; /// number of bins in Z(sensor dept) diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h index f8d4a784120a0..fbf9a59e6da4b 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h @@ -12,12 +12,11 @@ /// \file SegmentationMosaix.h /// \brief Definition of the SegmentationMosaix class /// \author felix.schlepper@cern.ch +/// \author chunzheng.wang@cern.ch #ifndef ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ #define ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ -#include - #include "MathUtils/Cartesian.h" #include "ITS3Base/SpecsV2.h" @@ -43,24 +42,22 @@ class SegmentationMosaix // 3. The detector coordinate system. Defined by the row and column segmentation // defined at the upper edge in the flat coord. - // row,col=0 - // | - // v - // x----------------------x - // | | | - // | | | - // | | | ^ x - // | | | | - // | | | | - // | | | | - // |-----------X----------| X marks (x,z)=(0,0) X----> z - // | | | + // O----------------------| // | | | + // | | | ^ x + // | | | | + // | | | | + // | | | | + // | | | X----> z X marks (x,z)=(0,0) + // |-----------X----------| + // | | | O----> col O marks (row,col)=(0,0) + // | | | | + // | | | | + // | | | v + // | | | row // | | | - // | | | - // | | | - // | | | - // x----------------------x + // |----------------------| + public: constexpr SegmentationMosaix(int layer) : mRadius(static_cast(constants::radiiMiddle[layer])) {} constexpr ~SegmentationMosaix() = default; @@ -79,7 +76,6 @@ class SegmentationMosaix static constexpr float PitchCol{constants::pixelarray::pixels::mosaix::pitchZ}; static constexpr float PitchRow{constants::pixelarray::pixels::mosaix::pitchX}; static constexpr float SensorLayerThickness{constants::totalThickness}; - static constexpr float NominalYShift{constants::nominalYShift}; /// Transformation from the curved surface to a flat surface. /// Additionally a shift in the flat coordinates must be applied because @@ -102,10 +98,10 @@ class SegmentationMosaix // stack float dist = std::hypot(xCurved, yCurved); float phi = std::atan2(yCurved, xCurved); - xFlat = (mRadius * phi) - WidthH; // the y position is in the silicon volume however we need the chip volume (silicon+metalstack) // this is accounted by a y shift - yFlat = dist - mRadius + NominalYShift; + xFlat = WidthH - mRadius * phi; + yFlat = dist - mRadius; } /// Transformation from the flat surface to a curved surface @@ -122,11 +118,12 @@ class SegmentationMosaix { // MUST align the flat surface with the curved surface with the original pixel array is on and account for metal // stack + float dist = yFlat + mRadius; + float phi = (WidthH - xFlat) / mRadius; // the y position is in the chip volume however we need the silicon volume // this is accounted by a -y shift - float dist = yFlat - NominalYShift + mRadius; - xCurved = dist * std::cos((xFlat + WidthH) / mRadius); - yCurved = dist * std::sin((xFlat + WidthH) / mRadius); + xCurved = dist * std::cos(phi); + yCurved = dist * std::sin(phi); } /// Transformation from Geant detector centered local coordinates (cm) to @@ -142,8 +139,11 @@ class SegmentationMosaix /// \param int iCol Detector z cell coordinate. constexpr bool localToDetector(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept { + if (!isValidLoc(xRow, zCol)) { + return false; + } localToDetectorUnchecked(xRow, zCol, iRow, iCol); - if (!isValid(iRow, iCol)) { + if (!isValidDet(iRow, iCol)) { iRow = iCol = -1; return false; } @@ -167,49 +167,54 @@ class SegmentationMosaix /// center of the sensitive volume. /// If iRow and or iCol is outside of the segmentation range a value of -0.5*Dx() /// or -0.5*Dz() is returned. - constexpr bool detectorToLocal(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + bool detectorToLocal(float const row, float const col, float& xRow, float& zCol) const noexcept { - if (!isValid(iRow, iCol)) { + if (!isValidDet(row, col)) { return false; } - detectorToLocalUnchecked(iRow, iCol, xRow, zCol); - return isValid(xRow, zCol); + detectorToLocalUnchecked(row, col, xRow, zCol); + return isValidLoc(xRow, zCol); } // Same as detectorToLocal w.o. checks. // We position ourself in the middle of the pixel. - constexpr void detectorToLocalUnchecked(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + void detectorToLocalUnchecked(float const row, float const col, float& xRow, float& zCol) const noexcept { - xRow = -(static_cast(iRow) + 0.5f) * PitchRow + WidthH; - zCol = (static_cast(iCol) + 0.5f) * PitchCol - LengthH; + xRow = -(row + 0.5f) * PitchRow + WidthH; + zCol = (col + 0.5f) * PitchCol - LengthH; } - bool detectorToLocal(int const row, int const col, math_utils::Point3D& loc) const noexcept + bool detectorToLocal(float const row, float const col, math_utils::Point3D& loc) const noexcept { float xRow{0.}, zCol{0.}; if (!detectorToLocal(row, col, xRow, zCol)) { return false; } - loc.SetCoordinates(xRow, NominalYShift, zCol); + loc.SetCoordinates(xRow, 0.0f, zCol); return true; } - void detectorToLocalUnchecked(int const row, int const col, math_utils::Point3D& loc) const noexcept + void detectorToLocalUnchecked(float const row, float const col, math_utils::Point3D& loc) const noexcept { float xRow{0.}, zCol{0.}; detectorToLocalUnchecked(row, col, xRow, zCol); - loc.SetCoordinates(xRow, NominalYShift, zCol); + loc.SetCoordinates(xRow, 0.0f, zCol); } private: + // Check local coordinates (cm) validity. template - [[nodiscard]] constexpr bool isValid(T const row, T const col) const noexcept + constexpr bool isValidLoc(T const x, T const z) const noexcept { - if constexpr (std::is_floating_point_v) { // compares in local coord. - return (-WidthH < row && row < WidthH && -LengthH < col && col < LengthH); - } else { // compares in rows/cols - return !static_cast(row < 0 || row >= static_cast(NRows) || col < 0 || col >= static_cast(NCols)); - } + return (-WidthH < x && x < WidthH && -LengthH < z && z < LengthH); + } + + // Check detector coordinates validity. + template + constexpr bool isValidDet(T const row, T const col) const noexcept + { + return (row >= 0 && row < static_cast(NRows) && + col >= 0 && col < static_cast(NCols)); } float mRadius; diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h index fedaad9182cce..83db7632e72f4 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h @@ -134,7 +134,6 @@ constexpr std::array radii{19.0006 * mm, 25.228 * mm, 31.4554 * constexpr std::array radiiInner{radii[0] - silicon::thicknessIn, radii[1] - silicon::thicknessIn, radii[2] - silicon::thicknessIn}; // inner silicon radius constexpr std::array radiiOuter{radii[0] + silicon::thicknessOut, radii[1] + silicon::thicknessOut, radii[2] + silicon::thicknessOut}; // outer silicon radius constexpr std::array radiiMiddle{(radiiInner[0] + radiiOuter[0]) / 2., (radiiInner[1] + radiiOuter[1]) / 2., (radiiInner[2] + radiiOuter[2]) / 2.}; // middle silicon radius -constexpr double nominalYShift{-metalstack::thickness / 2.}; // shift to position in silicion volume to the chip volume (silicon+metalstack) // extra information of pixels and their response functions namespace pixelarray::pixels diff --git a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt index 39e435f0ba2e6..cb6812445283c 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt @@ -22,7 +22,10 @@ its3_add_macro(CompareClusterSize.C) its3_add_macro(CheckMosaixSegment.C) its3_add_macro(CheckMosaixSegmentTrans.C) its3_add_macro(CompareClustersAndDigits.C) +its3_add_macro(CompareClustersAndDigitsOnChip.C) its3_add_macro(CheckROFs.C) its3_add_macro(CheckTileNumbering.C) its3_add_macro(CreateITS3StaticDeadMap.C) its3_add_macro(TestSensorGeometry.C) +its3_add_macro(CorrTracksClusters.C) +its3_add_macro(CheckChipResponseFile.C) diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C b/Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C new file mode 100644 index 0000000000000..996a99d87ecbc --- /dev/null +++ b/Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C @@ -0,0 +1,192 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file CheckChipResponseFile.C +/// \brief Simple macro to check the chip response files + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include +#include +#include +#include +#include +#include +#include +#include + +#define ENABLE_UPGRADES +#include "ITSMFTSimulation/AlpideSimResponse.h" + +#include "ITS3Base/SegmentationMosaix.h" +#include "fairlogger/Logger.h" +#endif + +using SegmentationMosaix = o2::its3::SegmentationMosaix; + +double um2cm(double um) { return um * 1e-4; } +double cm2um(double cm) { return cm * 1e+4; } + +o2::itsmft::AlpideSimResponse *mAlpSimResp0 = nullptr, + *mAlpSimResp1 = nullptr, + *mAptSimResp1 = nullptr; + +o2::itsmft::AlpideSimResponse* loadResponse(const std::string& fileName, const std::string& respName) +{ + TFile* f = TFile::Open(fileName.data()); + if (!f) { + std::cerr << fileName << " not found" << std::endl; + return nullptr; + } + auto resp = (o2::itsmft::AlpideSimResponse*)f->Get(respName.data()); + if (!resp) + std::cerr << respName << " not found in " << fileName << std::endl; + return resp; +} + +void LoadRespFunc() +{ + std::string AptsFile = "$(O2_ROOT)/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/APTSResponseData.root"; + std::string AlpideFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; + + mAlpSimResp0 = loadResponse(AlpideFile, "response0"); // Vbb=0V + LOG(info) << "ALPIDE Vbb=0V response" << std::endl; + mAlpSimResp0->print(); + mAlpSimResp1 = loadResponse(AlpideFile, "response1"); // Vbb=-3V + LOG(info) << "ALPIDE Vbb=-3V response" << std::endl; + mAlpSimResp1->print(); + mAptSimResp1 = loadResponse(AptsFile, "response1"); // APTS + LOG(info) << "APTS response" << std::endl; + mAptSimResp1->print(); +} + +std::vector getCollectionSeediciencies(o2::itsmft::AlpideSimResponse* resp, + const std::vector& depths) +{ + std::vector seed; + bool flipRow = false, flipCol = false; + for (auto depth : depths) { + auto rspmat = resp->getResponse(0.0, 0.0, + um2cm(depth) + resp->getDepthMin() + 1.e-9, + flipRow, flipCol); + seed.push_back(rspmat ? rspmat->getValue(2, 2) : 0.f); + } + return seed; +} + +std::vector getShareValues(o2::itsmft::AlpideSimResponse* resp, + const std::vector& depths) +{ + std::vector share; + bool flipRow = false, flipCol = false; + for (auto depth : depths) { + auto rspmat = resp->getResponse(0.0, 0.0, + um2cm(depth) + resp->getDepthMin() + 1.e-9, + flipRow, flipCol); + float s = 0; + int npix = resp->getNPix(); + if (rspmat) { + for (int i = 0; i < npix; ++i) + for (int j = 0; j < npix; ++j) + if (!(i == npix / 2 && j == npix / 2)) + s += rspmat->getValue(i, j); + } + share.push_back(s); + } + return share; +} + +std::vector getEffValues(o2::itsmft::AlpideSimResponse* resp, + const std::vector& depths) +{ + std::vector all; + bool flipRow = false, flipCol = false; + for (auto depth : depths) { + auto rspmat = resp->getResponse(0.0, 0.0, + um2cm(depth) + resp->getDepthMin() + 1.e-9, + flipRow, flipCol); + float s = 0; + int npix = resp->getNPix(); + if (rspmat) { + for (int i = 0; i < npix; ++i) + for (int j = 0; j < npix; ++j) + s += rspmat->getValue(i, j); + } + all.push_back(s); + } + return all; +} + +void CheckChipResponseFile() +{ + LoadRespFunc(); + LOG(info) << "Response function loaded" << std::endl; + + std::vector vecDepth(50); + for (int i = 0; i < 50; ++i) + vecDepth[i] = i; + + int colors[] = {kOrange + 7, kRed + 1, kAzure + 4}; + struct RespInfo { + o2::itsmft::AlpideSimResponse* resp; + std::string title; + int color; + }; + std::vector responses = { + {mAptSimResp1, "APTS", colors[0]}, + {mAlpSimResp0, "ALPIDE Vbb=0V", colors[1]}, + {mAlpSimResp1, "ALPIDE Vbb=-3V", colors[2]}}; + + TCanvas* c1 = new TCanvas("c1", "c1", 800, 600); + TH1* frame = c1->DrawFrame(-1, -0.049, 50, 1.049); + frame->SetTitle(";Depth(um);Charge Collection Seed / Share / Eff"); + TLegend* leg = new TLegend(0.15, 0.5, 0.4, 0.85); + leg->SetFillStyle(0); + leg->SetBorderSize(0); + + for (auto& r : responses) { + if (!r.resp) + continue; + auto seed = getCollectionSeediciencies(r.resp, vecDepth); + auto shr = getShareValues(r.resp, vecDepth); + auto all = getEffValues(r.resp, vecDepth); + + TGraph* grSeed = new TGraph(vecDepth.size(), vecDepth.data(), seed.data()); + grSeed->SetTitle(Form("%s seed", r.title.c_str())); + grSeed->SetLineColor(r.color); + grSeed->SetLineWidth(2); + grSeed->SetMarkerColor(r.color); + grSeed->SetMarkerStyle(kFullCircle); + grSeed->SetMarkerSize(0.8); + grSeed->Draw("SAME LP"); + leg->AddEntry(grSeed, Form("%s seed", r.title.c_str()), "lp"); + + TGraph* grShare = new TGraph(vecDepth.size(), vecDepth.data(), shr.data()); + grShare->SetLineColor(r.color); + grShare->SetLineWidth(2); + grShare->SetMarkerColor(r.color); + grShare->SetMarkerStyle(kOpenSquare); + grShare->SetMarkerSize(1); + grShare->Draw("SAME LP"); + leg->AddEntry(grShare, Form("%s share", r.title.c_str()), "p"); + + TGraph* grEff = new TGraph(vecDepth.size(), vecDepth.data(), all.data()); + grEff->SetLineColor(r.color); + grEff->SetLineWidth(2); + grEff->SetMarkerColor(r.color); + grEff->SetMarkerStyle(kFullDiamond); + grEff->SetMarkerSize(1); + grEff->Draw("SAME LP"); + leg->AddEntry(grEff, Form("%s eff", r.title.c_str()), "p"); + } + leg->Draw(); + + c1->SaveAs("ChipResponse.pdf"); +} diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C index 1dc4a4e2d6b47..240b1bd344af5 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C @@ -80,8 +80,6 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil int nevD = digTree->GetEntries(); // digits in cont. readout may be grouped as few events per entry - int lastReadHitEv = -1; - int nDigitReadIB{0}, nDigitReadOB{0}; int nDigitFilledIB{0}, nDigitFilledOB{0}; diff --git a/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C new file mode 100644 index 0000000000000..310be8c5858ef --- /dev/null +++ b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C @@ -0,0 +1,579 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file CompareClustersAndDigitsOnChip.C +/// \brief Macro to compare ITS3 clusters and digits on a pixel array, + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#define ENABLE_UPGRADES +#include "DataFormatsITSMFT/CompCluster.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DetectorsCommonDataFormats/DetectorNameConf.h" +#include "ITS3Base/SegmentationMosaix.h" +#include "ITS3Base/SpecsV2.h" +#include "ITS3Reconstruction/TopologyDictionary.h" +#include "DataFormatsITSMFT/CompCluster.h" +#include "DataFormatsITSMFT/ClusterTopology.h" +#include "ITSBase/GeometryTGeo.h" +#include "ITSMFTBase/SegmentationAlpide.h" +#include "ITSMFTSimulation/Hit.h" +#include "MathUtils/Cartesian.h" +#include "MathUtils/Utils.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/MCTruthContainer.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "SimulationDataFormat/IOMCTruthContainerView.h" + +struct Data { + TH2F* pixelArray; + TGraph* hitS; + TGraph* hitM; + TGraph* hitE; + TGraph* clusS; + TGraph* cog; + TLegend* leg; + std::vector* vClusBox; + void clear() + { + delete pixelArray; + delete hitS; + delete hitM; + delete hitE; + delete clusS; + delete cog; + delete leg; + for (auto& b : *vClusBox) { + delete b; + } + delete vClusBox; + } +}; + +void CompareClustersAndDigitsOnChip(std::string clusfile = "o2clus_its.root", + std::string digifile = "it3digits.root", + std::string dictfile = "", + std::string hitfile = "o2sim_HitsIT3.root", + std::string inputGeom = "o2sim_geometry.root", + bool batch = true) +{ + TH1::AddDirectory(kFALSE); + gROOT->SetBatch(batch); + gStyle->SetPalette(kRainBow); + gStyle->SetOptStat(0); + + using namespace o2::base; + using namespace o2::its; + using o2::itsmft::Hit; + using Segmentation = o2::itsmft::SegmentationAlpide; + using o2::itsmft::ClusterTopology; + using o2::itsmft::CompClusterExt; + using ROFRec = o2::itsmft::ROFRecord; + using MC2ROF = o2::itsmft::MC2ROFRecord; + using HitVec = std::vector; + using MC2HITS_map = std::unordered_map; // maps (track_ID<<16 + chip_ID) to entry in the hit vector + std::vector hitVecPool; + std::vector mc2hitVec; + + std::array mMosaixSegmentations{0, 1, 2}; + + // Geometry + o2::base::GeometryManager::loadGeometry(inputGeom); + auto gman = o2::its::GeometryTGeo::Instance(); + gman->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, + o2::math_utils::TransformType::T2GRot, + o2::math_utils::TransformType::L2G)); // request cached transforms + const int nChips = gman->getNumberOfChips(); + + LOGP(info, "Total number of chips is {} in ITS3 (IB and OB)", nChips); + + // Create all plots + LOGP(info, "Selecting chips to be visualised"); + std::set selectedChips; + std::map> chipGroups; + + for (int chipID{0}; chipID < nChips; ++chipID) { + TString tpath = gman->getMatrixPath(chipID); + std::string path = tpath.Data(); + + std::vector tokens; + std::istringstream iss(path); + std::string token; + while (std::getline(iss, token, '/')) { + if (!token.empty()) { + tokens.push_back(token); + } + } + + std::string segmentName, staveName, carbonFormName; + for (const auto& t : tokens) { + if (t.find("ITS3Segment") != std::string::npos) + segmentName = t; + if (t.find("ITSUStave") != std::string::npos) + staveName = t; + if (t.find("ITS3CarbonForm") != std::string::npos) + carbonFormName = t; + } + + std::string groupKey; + if (!segmentName.empty()) { + groupKey = segmentName + "_" + carbonFormName; + } else if (!staveName.empty()) { + groupKey = staveName; + } else { + continue; + } + + chipGroups[groupKey].push_back(chipID); + } + + LOGP(info, "From each IB Segment or OB Stave, 10 chipIDs are uniformly selected"); + LOGP(info, "Selected chipID: "); + for (auto& [groupName, ids] : chipGroups) { + std::vector sampled; + if (ids.size() <= 10) { + for (auto id : ids) { + selectedChips.insert(id); + sampled.push_back(id); + } + } else { + for (int i{0}; i < 10; ++i) { + int idx = i * (ids.size() - 1) / 9; // 9 intervals for 10 points + int id = ids[idx]; + if (selectedChips.insert(id).second) { + sampled.push_back(id); + } + } + } + + std::ostringstream oss; + std::string topOrBot = "N/A"; + std::smatch match; + std::regex rgxSegment(R"(Segment(\d+)_(\d+)_ITS3CarbonForm\d+_(\d+))"); + std::regex rgxStave(R"(Stave(\d+)_(\d+))"); + if (std::regex_search(groupName, match, rgxSegment)) { + int layer = std::stoi(match[1]); + int segment = std::stoi(match[2]); + int carbonForm = std::stoi(match[3]); + topOrBot = (carbonForm == 0 ? "TOP" : "BOT"); + oss << topOrBot << " segment " << segment << " at layer " << layer << ": "; + } else if (std::regex_search(groupName, match, rgxStave)) { + int layer = std::stoi(match[1]); + int stave = std::stoi(match[2]); + oss << "Stave " << stave << " at layer " << layer << ": "; + } else { + LOGP(error, "Cannot select the correct chipID in OB or IB"); + return; + } + for (auto id : sampled) { + oss << id << " "; + } + LOG(info) << oss.str(); + } + LOGP(info, "{} selected chips will be visualized and analyzed.", chipGroups.size()); + + // Hits + TFile fileH(hitfile.data()); + auto* hitTree = dynamic_cast(fileH.Get("o2sim")); + std::vector* hitArray = nullptr; + hitTree->SetBranchAddress("IT3Hit", &hitArray); + mc2hitVec.resize(hitTree->GetEntries()); + hitVecPool.resize(hitTree->GetEntries(), nullptr); + + // Digits + TFile* digFile = TFile::Open(digifile.data()); + TTree* digTree = (TTree*)digFile->Get("o2sim"); + std::vector* digArr = nullptr; + digTree->SetBranchAddress("IT3Digit", &digArr); + o2::dataformats::IOMCTruthContainerView* plabels = nullptr; + digTree->SetBranchAddress("IT3DigitMCTruth", &plabels); + + // Clusters + TFile fileC(clusfile.data()); + auto* clusTree = dynamic_cast(fileC.Get("o2sim")); + std::vector* clusArr = nullptr; + clusTree->SetBranchAddress("ITSClusterComp", &clusArr); + std::vector* patternsPtr = nullptr; + auto pattBranch = clusTree->GetBranch("ITSClusterPatt"); + if (pattBranch != nullptr) { + pattBranch->SetAddress(&patternsPtr); + } + + // Topology dictionary + o2::its3::TopologyDictionary dict; + bool hasAvailableDict = false; + if (!dictfile.empty()) { + std::ifstream file(dictfile.c_str()); + if (file.good()) { + LOGP(info, "Running with external topology dictionary: {}", dictfile); + dict.readFromFile(dictfile); + LOGP(info, "The IB dictionary size is {}, and the OB dictionary size is {}", dict.getSize(true), dict.getSize(false)); + hasAvailableDict = dict.getSize(true) != 0 && dict.getSize(false) != 0; + if (hasAvailableDict) { + LOGP(info, "Dictionaries is vaild."); + } else { + LOGP(info, "Dictionaries is NOT vaild!"); + } + } else { + LOGP(info, "Cannot open dictionary file: {}. Running without external dictionary!", dictfile); + dictfile = ""; + } + } else { + LOGP(info, "Running without external topology dictionary!"); + } + + // ROFrecords + std::vector rofRecVec, *rofRecVecP = &rofRecVec; + clusTree->SetBranchAddress("ITSClustersROF", &rofRecVecP); + + // Cluster MC labels + o2::dataformats::MCTruthContainer* clusLabArr = nullptr; + std::vector mc2rofVec, *mc2rofVecP = &mc2rofVec; + if ((hitTree != nullptr) && (clusTree->GetBranch("ITSClusterMCTruth") != nullptr)) { + clusTree->SetBranchAddress("ITSClusterMCTruth", &clusLabArr); + clusTree->SetBranchAddress("ITSClustersMC2ROF", &mc2rofVecP); + } + + clusTree->GetEntry(0); + unsigned int nROFRec = (int)rofRecVec.size(); + std::vector mcEvMin(nROFRec, hitTree->GetEntries()); + std::vector mcEvMax(nROFRec, -1); + + // Build min and max MC events used by each ROF + for (int imc = mc2rofVec.size(); imc--;) { + const auto& mc2rof = mc2rofVec[imc]; + if (mc2rof.rofRecordID < 0) { + continue; // this MC event did not contribute to any ROF + } + for (unsigned int irfd = mc2rof.maxROF - mc2rof.minROF + 1; irfd--;) { + unsigned int irof = mc2rof.rofRecordID + irfd; + if (irof >= nROFRec) { + LOGP(error, "ROF = {} from MC2ROF record is >= N ROFs = {}", irof, nROFRec); + } + if (mcEvMin[irof] > imc) { + mcEvMin[irof] = imc; + } + if (mcEvMax[irof] < imc) { + mcEvMax[irof] = imc; + } + } + } + + // Create all plots + LOGP(info, "Creating plots"); + std::unordered_map data; + auto initData = [&](int chipID, Data& dat) { + if (dat.pixelArray) + return; + + int nCol{0}, nRow{0}; + float lengthPixArr{0}, widthPixArr{0}; + bool isIB = o2::its3::constants::detID::isDetITS3(chipID); + int layer = gman->getLayer(chipID); + if (isIB) { + nCol = o2::its3::SegmentationMosaix::NCols; + nRow = o2::its3::SegmentationMosaix::NRows; + lengthPixArr = o2::its3::constants::pixelarray::pixels::mosaix::pitchZ * nCol; + widthPixArr = o2::its3::constants::pixelarray::pixels::mosaix::pitchX * nRow; + } else { + nCol = o2::itsmft::SegmentationAlpide::NCols; + nRow = o2::itsmft::SegmentationAlpide::NRows; + lengthPixArr = o2::itsmft::SegmentationAlpide::PitchCol * nCol; + widthPixArr = o2::itsmft::SegmentationAlpide::PitchRow * nRow; + } + + dat.pixelArray = new TH2F(Form("histSensor_%d", chipID), Form("SensorID=%d;z(cm);x(cm)", chipID), + nCol, -0.5 * lengthPixArr, 0.5 * lengthPixArr, + nRow, -0.5 * widthPixArr, 0.5 * widthPixArr); + dat.hitS = new TGraph(); + dat.hitS->SetMarkerStyle(kFullTriangleDown); + dat.hitS->SetMarkerColor(kGreen); + dat.hitM = new TGraph(); + dat.hitM->SetMarkerStyle(kFullCircle); + dat.hitM->SetMarkerColor(kGreen + 3); + dat.hitE = new TGraph(); + dat.hitE->SetMarkerStyle(kFullTriangleUp); + dat.hitE->SetMarkerColor(kGreen + 5); + dat.clusS = new TGraph(); + dat.clusS->SetMarkerStyle(kFullSquare); + dat.clusS->SetMarkerColor(kBlue); + dat.cog = new TGraph(); + dat.cog->SetMarkerStyle(kFullDiamond); + dat.cog->SetMarkerColor(kRed); + dat.leg = new TLegend(0.7, 0.7, 0.92, 0.92); + dat.leg->AddEntry(dat.hitS, "Hit Start"); + dat.leg->AddEntry(dat.hitM, "Hit Middle"); + dat.leg->AddEntry(dat.hitE, "Hit End"); + dat.leg->AddEntry(dat.clusS, "Cluster Start"); + dat.leg->AddEntry(dat.cog, "Cluster COG"); + dat.vClusBox = new std::vector; + }; + + LOGP(info, "Filling digits"); + for (int iDigit{0}; digTree->LoadTree(iDigit) >= 0; ++iDigit) { + digTree->GetEntry(iDigit); + for (const auto& digit : *digArr) { + const auto chipID = digit.getChipIndex(); + if (!selectedChips.count(chipID)) + continue; + const auto layer = gman->getLayer(chipID); + bool isIB = layer < 3; + float locDigiX{0}, locDigiZ{0}; + if (isIB) { + mMosaixSegmentations[layer].detectorToLocal(digit.getRow(), digit.getColumn(), locDigiX, locDigiZ); + } else { + o2::itsmft::SegmentationAlpide::detectorToLocal(digit.getRow(), digit.getColumn(), locDigiX, locDigiZ); + } + auto& dat = data[chipID]; + initData(chipID, dat); + data[chipID].pixelArray->Fill(locDigiZ, locDigiX); + } + } + + LOGP(info, "Building min and max MC events used by each ROF, total ROFs {}", nROFRec); + auto pattIt = patternsPtr->cbegin(); + bool isAllPattIDInvaild{true}; + for (unsigned int irof{0}; irof < nROFRec; irof++) { + const auto& rofRec = rofRecVec[irof]; + // >> read and map MC events contributing to this ROF + for (int im = mcEvMin[irof]; im <= mcEvMax[irof]; im++) { + if (hitVecPool[im] == nullptr) { + hitTree->SetBranchAddress("IT3Hit", &hitVecPool[im]); + hitTree->GetEntry(im); + auto& mc2hit = mc2hitVec[im]; + const auto* hitArray = hitVecPool[im]; + for (int ih = hitArray->size(); ih--;) { + const auto& hit = (*hitArray)[ih]; + uint64_t key = (uint64_t(hit.GetTrackID()) << 32) + hit.GetDetectorID(); + mc2hit.emplace(key, ih); + } + } + } + + // Clusters in this ROF + for (int icl{0}; icl < rofRec.getNEntries(); icl++) { + int clEntry = rofRec.getFirstEntry() + icl; // entry of icl-th cluster of this ROF in the vector of clusters + const auto& cluster = (*clusArr)[clEntry]; + const auto chipID = cluster.getSensorID(); + if (!selectedChips.count(chipID)) { + // Even if not selected, advance pattIt if patternID is InvalidPatternID + if (cluster.getPatternID() == o2::itsmft::CompCluster::InvalidPatternID) { + o2::itsmft::ClusterPattern::skipPattern(pattIt); + } + continue; + } + const auto pattID = cluster.getPatternID(); + const bool isIB = o2::its3::constants::detID::isDetITS3(chipID); + const auto layer = gman->getLayer(chipID); + auto& dat = data[chipID]; + initData(chipID, dat); + o2::itsmft::ClusterPattern pattern; + // Pattern extraction + if (cluster.getPatternID() != o2::itsmft::CompCluster::InvalidPatternID) { + isAllPattIDInvaild = false; + if (!hasAvailableDict) { + LOGP(error, "Encountered pattern ID {}, which is not equal to the invalid pattern ID {}", cluster.getPatternID(), o2::itsmft::CompCluster::InvalidPatternID); + LOGP(error, "Clusters have already been generated with a dictionary which was not provided properly!"); + return; + } + if (dict.isGroup(cluster.getPatternID(), isIB)) { + pattern.acquirePattern(pattIt); + } else { + pattern = dict.getPattern(cluster.getPatternID(), isIB); + } + } else { + pattern.acquirePattern(pattIt); + } + + // Hits + const auto& lab = (clusLabArr->getLabels(clEntry))[0]; + if (!lab.isValid()) + continue; + const int trID = lab.getTrackID(); + const auto& mc2hit = mc2hitVec[lab.getEventID()]; + const auto* hitArray = hitVecPool[lab.getEventID()]; + uint64_t key = (uint64_t(trID) << 32) + chipID; + auto hitEntry = mc2hit.find(key); + if (hitEntry == mc2hit.end()) + continue; + o2::math_utils::Point3D locHMiddle; + const auto& hit = (*hitArray)[hitEntry->second]; + auto locHEnd = gman->getMatrixL2G(chipID) ^ (hit.GetPos()); + auto locHStart = gman->getMatrixL2G(chipID) ^ (hit.GetPosStart()); + if (isIB) { + float xFlat{0.}, yFlat{0.}; + mMosaixSegmentations[layer].curvedToFlat(locHEnd.X(), locHEnd.Y(), xFlat, yFlat); + locHEnd.SetXYZ(xFlat, yFlat, locHEnd.Z()); + mMosaixSegmentations[layer].curvedToFlat(locHStart.X(), locHStart.Y(), xFlat, yFlat); + locHStart.SetXYZ(xFlat, yFlat, locHStart.Z()); + } + locHMiddle.SetXYZ(0.5f * (locHEnd.X() + locHStart.X()), + 0.5f * (locHEnd.Y() + locHStart.Y()), + 0.5f * (locHEnd.Z() + locHStart.Z())); + data[chipID].hitS->AddPoint(locHStart.Z(), locHStart.X()); + data[chipID].hitM->AddPoint(locHMiddle.Z(), locHMiddle.X()); + data[chipID].hitE->AddPoint(locHEnd.Z(), locHEnd.X()); + + // Cluster Start point + float locCluX{0}, locCluZ{0}; + if (isIB) { + mMosaixSegmentations[layer].detectorToLocal(cluster.getRow(), cluster.getCol(), locCluX, locCluZ); + } else { + o2::itsmft::SegmentationAlpide::detectorToLocal(cluster.getRow(), cluster.getCol(), locCluX, locCluZ); + } + data[chipID].clusS->AddPoint(locCluZ, locCluX); + + // COG + o2::math_utils::Point3D locCOG; + // Cluster COG using dictionary (if available) + if (hasAvailableDict && (pattID != o2::itsmft::CompCluster::InvalidPatternID && !dict.isGroup(pattID, isIB))) { + locCOG = dict.getClusterCoordinates(cluster); + } else { + if (isIB) { + locCOG = o2::its3::TopologyDictionary::getClusterCoordinates(cluster, pattern, false); + } else { + locCOG = o2::itsmft::TopologyDictionary::getClusterCoordinates(cluster, pattern, false); + } + } + if (isIB) { + float flatX{0}, flatY{0}; + mMosaixSegmentations[layer].curvedToFlat(locCOG.X(), locCOG.Y(), flatX, flatY); + locCOG.SetCoordinates(flatX, flatY, locCOG.Z()); + } + data[chipID].cog->AddPoint(locCOG.Z(), locCOG.X()); + + // Cluster Box using dictionary if available, otherwise use raw pattern + float lowLeftX{0}, lowLeftZ{0}, topRightX{0}, topRightZ{0}; + // Use dictionary-based cluster box + if (isIB) { + mMosaixSegmentations[layer].detectorToLocal(cluster.getRow(), cluster.getCol(), lowLeftX, lowLeftZ); + mMosaixSegmentations[layer].detectorToLocal(cluster.getRow() + pattern.getRowSpan() - 1, + cluster.getCol() + pattern.getColumnSpan() - 1, + topRightX, topRightZ); + lowLeftX += 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchX; + lowLeftZ -= 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchZ; + topRightX -= 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchX; + topRightZ += 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchZ; + } else { + o2::itsmft::SegmentationAlpide::detectorToLocal(cluster.getRow(), cluster.getCol(), lowLeftX, lowLeftZ); + o2::itsmft::SegmentationAlpide::detectorToLocal(cluster.getRow() + pattern.getRowSpan() - 1, + cluster.getCol() + pattern.getColumnSpan() - 1, + topRightX, topRightZ); + lowLeftX += 0.5 * o2::itsmft::SegmentationAlpide::PitchRow; + lowLeftZ -= 0.5 * o2::itsmft::SegmentationAlpide::PitchCol; + topRightX -= 0.5 * o2::itsmft::SegmentationAlpide::PitchRow; + topRightZ += 0.5 * o2::itsmft::SegmentationAlpide::PitchCol; + } + auto clusBox = new TBox(lowLeftZ, lowLeftX, topRightZ, topRightX); + clusBox->SetFillColorAlpha(0, 0); + clusBox->SetFillStyle(0); + clusBox->SetLineWidth(4); + clusBox->SetLineColor(kBlack); + data[chipID].vClusBox->push_back(clusBox); + } + } + + if (isAllPattIDInvaild) { + LOGP(info, "Verified input cluster file was generated w/o topology dictionary"); + if (!dictfile.empty()) { + LOGP(error, "Non-dictionary cluster file processed by external dictionary! Please adjust input."); + return; + } + } + + LOGP(info, "Writing to root file"); + double x1, y1, x2, y2; + auto oFileOut = TFile::Open("CompareClustersAndDigitsOnChip.root", "RECREATE"); + oFileOut->cd(); + for (int chipID{0}; chipID < nChips; chipID++) { + if (!selectedChips.count(chipID)) + continue; + auto& dat = data[chipID]; + TString tpath = gman->getMatrixPath(chipID); + const std::string cpath{tpath.Data() + 39, tpath.Data() + tpath.Length()}; + const std::filesystem::path p{cpath}; + std::string nestedDir = p.parent_path().string(); + TDirectory* currentDir = oFileOut; + std::istringstream iss(nestedDir); + std::string token; + while (std::getline(iss, token, '/')) { + if (token.empty()) + continue; + TDirectory* nextDir = currentDir->GetDirectory(token.c_str()); + if (!nextDir) { + nextDir = currentDir->mkdir(token.c_str()); + } + if (!nextDir) { + LOGP(error, "Cannot create subdirectory: %s", token.c_str()); + break; + } + currentDir = nextDir; + currentDir->cd(); + } + if (!currentDir) { + LOGP(error, "Failed to create nested directory for chip %d", chipID); + continue; + } + + auto canv = new TCanvas(Form("%s_%d", p.filename().c_str(), chipID)); + canv->SetTitle(Form("%s_%d", p.filename().c_str(), chipID)); + canv->cd(); + gPad->SetGrid(1, 1); + dat.pixelArray->Draw("colz"); + dat.hitS->Draw("p;same"); + dat.hitM->Draw("p;same"); + dat.hitE->Draw("p;same"); + auto arr = new TArrow(); + arr->SetArrowSize(0.01); + for (int i{0}; i < dat.hitS->GetN(); ++i) { + dat.hitS->GetPoint(i, x1, y1); + dat.hitE->GetPoint(i, x2, y2); + arr->DrawArrow(x1, y1, x2, y2); + } + dat.clusS->Draw("p;same"); + if (dat.cog->GetN() != 0) + dat.cog->Draw("p;same"); + for (const auto& clusBox : *dat.vClusBox) { + clusBox->Draw(); + } + dat.leg->Draw(); + canv->SetEditable(false); + + currentDir->WriteTObject(canv, canv->GetName()); + dat.clear(); + delete canv; + delete arr; + printf("\rWriting chip %05d", chipID); + } + printf("\n"); + oFileOut->Write(); + oFileOut->Close(); + LOGP(info, "Finished writing selected chip visualizations."); +} \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C b/Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C new file mode 100644 index 0000000000000..634d761366920 --- /dev/null +++ b/Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C @@ -0,0 +1,638 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include +#include "TEfficiency.h" +#include +#include +#include + +#include "ITSMFTSimulation/Hit.h" +#include "DataFormatsITS/TrackITS.h" +#include "DetectorsBase/Propagator.h" +#include "Field/MagneticField.h" +#include "ITSBase/GeometryTGeo.h" +#include "DataFormatsITSMFT/CompCluster.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/MCEventHeader.h" +#include "SimulationDataFormat/MCTrack.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "SimulationDataFormat/MCTruthContainer.h" +#include "SimulationDataFormat/TrackReference.h" +#include "ITS3Reconstruction/TopologyDictionary.h" +#include "ITSMFTBase/SegmentationAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" + +#include +#include +#include +#include +#endif + +using namespace std; +using namespace o2::itsmft; +using namespace o2::its; +using SegmentationIB = o2::its3::SegmentationMosaix; +using SegmentationOB = o2::itsmft::SegmentationAlpide; +static constexpr int kNLayer = 7; +static constexpr int INVALID_INT = -99; +static constexpr float INVALID_FLOAT = -99.f; + +//______________________________________________________________________________ +// ParticleInfo structure +struct ParticleInfo { + int event{}; + int pdg{}; + float pt{}; + float recpt{}; + float eta{}; + float phi{}; + float pvx{}; + float pvy{}; + float pvz{}; + float dcaxy{}; + float dcaz{}; + int mother{}; + int first{}; + unsigned short clusters = 0u; + unsigned char isReco = 0u; + unsigned char isFake = 0u; + bool isPrimary = false; + unsigned char storedStatus = 2; /// not stored = 2, fake = 1, good = 0 + std::array clusterSize; + std::array clusterPattern; + std::array clusterLocX; + std::array clusterLocZ; + std::array hitLocX; + std::array hitLocY; + std::array hitLocZ; + o2::its::TrackITS track; + ParticleInfo() + { + clusterSize.fill(INVALID_INT); + clusterPattern.fill(INVALID_INT); + clusterLocX.fill(INVALID_FLOAT); + clusterLocZ.fill(INVALID_FLOAT); + hitLocX.fill(INVALID_FLOAT); + hitLocY.fill(INVALID_FLOAT); + hitLocZ.fill(INVALID_FLOAT); + } +}; + +//______________________________________________________________________________ +// Convert curved local coordinates to flat coordinates +void CurvedLocalToFlat(o2::math_utils::Point3D& point, const SegmentationIB& seg) +{ + float xFlat = 0.f, yFlat = 0.f; + seg.curvedToFlat(point.X(), point.Y(), xFlat, yFlat); + point.SetXYZ(xFlat, yFlat, point.Z()); +} + +//______________________________________________________________________________ +// Resolve pattern from patternID and iterator +bool resolvePattern(const o2::itsmft::CompClusterExt& cluster, + decltype(std::declval>().cbegin())& pattIt, + const o2::its3::TopologyDictionary& dict, + bool isIB, + o2::itsmft::ClusterPattern& pattOut) +{ + auto pattID = cluster.getPatternID(); + if (pattID != o2::itsmft::CompCluster::InvalidPatternID) { + if (!dict.getSize(true) && !dict.getSize(false)) { + LOGP(error, "Encountered non-invalid pattern ID {} but dictionary is missing!", pattID); + return false; + } + if (dict.isGroup(pattID, isIB)) { + pattOut.acquirePattern(pattIt); + } else { + pattOut = dict.getPattern(pattID, isIB); + } + } else { + pattOut.acquirePattern(pattIt); + } + return true; +} + +//______________________________________________________________________________ +// Function to analyze reconstructed tracks +void analyzeRecoTracks(TTree* recTree, + const std::vector* recArr, + const std::vector* trkLabArr, + std::vector>& info, + float bz, + ULong_t& unaccounted, + ULong_t& good, + ULong_t& fakes, + ULong_t& total) +{ + unaccounted = good = fakes = total = 0; + for (int frame = 0; frame < recTree->GetEntriesFast(); frame++) { // reco tracks frames + if (recTree->GetEvent(frame) == 0) + continue; + total += trkLabArr->size(); + for (unsigned int iTrack = 0; iTrack < trkLabArr->size(); ++iTrack) { + auto lab = trkLabArr->at(iTrack); + if (!lab.isSet()) { + unaccounted++; + continue; + } + int trackID, evID, srcID; + bool fake; + lab.get(trackID, evID, srcID, fake); + if (evID < 0 || evID >= (int)info.size()) { + unaccounted++; + continue; + } + if (trackID < 0 || trackID >= (int)info[evID].size()) { + unaccounted++; + continue; + } + info[evID][trackID].isReco += !fake; + info[evID][trackID].isFake += fake; + if (recArr->at(iTrack).isBetter(info[evID][trackID].track, 1.e9)) { + info[evID][trackID].storedStatus = fake; + info[evID][trackID].track = recArr->at(iTrack); + float ip[2]{0., 0.}; + info[evID][trackID].track.getImpactParams(info[evID][trackID].pvx, + info[evID][trackID].pvy, + info[evID][trackID].pvz, bz, ip); + info[evID][trackID].dcaxy = ip[0]; + info[evID][trackID].dcaz = ip[1]; + info[evID][trackID].recpt = info[evID][trackID].track.getPt(); + } + fakes += static_cast(fake); + good += static_cast(!fake); + } + } + LOGP(info, "** Some statistics:"); + LOGP(info, "\t- Total number of tracks: {}", total); + LOGP(info, "\t- Total number of tracks not corresponding to particles: {} ({:.2f}%)", unaccounted, unaccounted * 100. / total); + LOGP(info, "\t- Total number of fakes: {} ({:.2f}%)", fakes, fakes * 100. / total); + LOGP(info, "\t- Total number of good: {} ({:.2f}%)", good, good * 100. / total); +} + +//______________________________________________________________________________ +// Read and map hit information from hitTree +void mapHitsForMCEvents(TTree* hitTree, + std::vector*>& hitVecPool, + std::vector>& mc2hitVec, + const std::vector& mcEvMin, + const std::vector& mcEvMax, + size_t nROFRec) +{ + for (unsigned int irof = 0; irof < nROFRec; irof++) { + for (int im = mcEvMin[irof]; im <= mcEvMax[irof]; im++) { + if (!hitVecPool[im]) { + hitTree->SetBranchAddress("IT3Hit", &hitVecPool[im]); + hitTree->GetEntry(im); + auto& mc2hit = mc2hitVec[im]; + const auto* hitArray = hitVecPool[im]; + for (int ih = hitArray->size(); ih--;) { + const auto& hit = (*hitArray)[ih]; + uint64_t key = (uint64_t(hit.GetTrackID()) << 32) + hit.GetDetectorID(); + mc2hit.emplace(key, ih); + } + } + } + } +} + +//______________________________________________________________________________ +// Load geometry and magnetic field information +void loadGeometryAndField(const std::string& magfile, const std::string& inputGeom, float& bz, o2::its::GeometryTGeo*& gman) +{ + o2::base::Propagator::initFieldFromGRP(magfile); + bz = o2::base::Propagator::Instance()->getNominalBz(); + o2::base::GeometryManager::loadGeometry(inputGeom); + gman = o2::its::GeometryTGeo::Instance(); + gman->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, + o2::math_utils::TransformType::T2GRot, + o2::math_utils::TransformType::L2G)); +} + +//______________________________________________________________________________ +// Load topology dictionary +void loadTopologyDictionary(const std::string& dictfile, o2::its3::TopologyDictionary& dict) +{ + std::ifstream iofile(dictfile); + if (iofile.good()) { + LOG(info) << "Running with dictionary: " << dictfile; + dict.readFromFile(dictfile); + } else { + LOG(info) << "Dictionary file not found: " << dictfile; + } +} + +//______________________________________________________________________________ +// Build ROF +void buildMcEvRangePerROF(const std::vector& mc2rofVec, + size_t nROFRec, + std::vector& mcEvMin, + std::vector& mcEvMax) +{ + for (size_t imc = 0; imc < mc2rofVec.size(); ++imc) { + const auto& mc2rof = mc2rofVec[imc]; + if (mc2rof.rofRecordID < 0) + continue; + for (size_t i = mc2rof.minROF; i <= mc2rof.maxROF; ++i) { + if (i >= nROFRec) + continue; + mcEvMin[i] = std::min(mcEvMin[i], static_cast(imc)); + mcEvMax[i] = std::max(mcEvMax[i], static_cast(imc)); + } + } +} + +//______________________________________________________________________________ +// Load Hits data +void prepareHitAccess(const std::string& hitfile, + TTree*& hitTree, + std::vector*>& hitVecPool, + std::vector>& mc2hitVec) +{ + TFile* fHit = TFile::Open(hitfile.data()); + hitTree = (TTree*)fHit->Get("o2sim"); + mc2hitVec.resize(hitTree->GetEntries()); + hitVecPool.resize(hitTree->GetEntries(), nullptr); +} + +void loadCluster(const std::string& clusfile, + TTree*& clusTree, + std::vector*& clusArr, + o2::dataformats::MCTruthContainer*& clusLabArr, + std::vector& mc2rofVec, + std::vector*& patternsPtr, + std::vector& rofRecVec) +{ + // Open file and let it persist + TFile* fileC = TFile::Open(clusfile.data()); + // Get tree + clusTree = dynamic_cast(fileC->Get("o2sim")); + // Cluster array + clusArr = nullptr; + clusTree->SetBranchAddress("ITSClusterComp", &clusArr); + // MC truth + clusLabArr = nullptr; + clusTree->SetBranchAddress("ITSClusterMCTruth", &clusLabArr); + clusTree->SetBranchAddress("ITSClusterPatt", &patternsPtr); + // ROF records + std::vector* rofRecVecP = &rofRecVec; + clusTree->SetBranchAddress("ITSClustersROF", &rofRecVecP); + // MC2ROF mapping + std::vector* mc2rofVecP = &mc2rofVec; + clusTree->SetBranchAddress("ITSClustersMC2ROF", &mc2rofVecP); + clusTree->GetEntry(0); + // After setting all branch addresses, trigger preload of the first entr +} + +//______________________________________________________________________________ +// Load Reconstructed Tracks data +void loadRecoTracks(const std::string& tracfile, + TTree*& recTree, + std::vector*& recArr, + std::vector*& trkLabArr) +{ + TFile* fTrk = TFile::Open(tracfile.data()); + recTree = (TTree*)fTrk->Get("o2sim"); + recTree->SetBranchAddress("ITSTrack", &recArr); + recTree->SetBranchAddress("ITSTrackMCTruth", &trkLabArr); +} + +//______________________________________________________________________________ +// Load MC Track information +void loadMCTrackInfo(const std::string& kinefile, + std::vector>& info, + std::vector*& mcArr, + o2::dataformats::MCEventHeader*& mcEvent, + TTree*& mcTree) +{ + TFile* kineFile = TFile::Open(kinefile.data()); + mcTree = (TTree*)kineFile->Get("o2sim"); + mcTree->SetBranchStatus("*", 0); + mcTree->SetBranchStatus("MCTrack*", 1); + mcTree->SetBranchStatus("MCEventHeader*", 1); + mcTree->SetBranchAddress("MCTrack", &mcArr); + mcTree->SetBranchAddress("MCEventHeader.", &mcEvent); + + int nev = mcTree->GetEntriesFast(); + info.resize(nev); + for (int n = 0; n < nev; n++) { + mcTree->GetEvent(n); + info[n].resize(mcArr->size()); + for (unsigned int mcI = 0; mcI < mcArr->size(); ++mcI) { + auto part = mcArr->at(mcI); + info[n][mcI].pvx = mcEvent->GetX(); + info[n][mcI].pvy = mcEvent->GetY(); + info[n][mcI].pvz = mcEvent->GetZ(); + info[n][mcI].event = n; + info[n][mcI].pdg = part.GetPdgCode(); + info[n][mcI].pt = part.GetPt(); + info[n][mcI].phi = part.GetPhi(); + info[n][mcI].eta = part.GetEta(); + info[n][mcI].isPrimary = part.isPrimary(); + } + } +} + +//______________________________________________________________________________ +// Main function CorrTracksClusters +void CorrTracksClusters(const std::string& tracfile = "o2trac_its.root", + const std::string& clusfile = "o2clus_its.root", + const std::string& kinefile = "o2sim_Kine.root", + const std::string& magfile = "o2sim_grp.root", + const std::string& hitfile = "o2sim_HitsIT3.root", + const std::string& dictfile = "IT3dictionary.root", + const std::string& inputGeom = "", + bool batch = false) +{ + gROOT->SetBatch(batch); + + // Geo and Field + LOGP(info, "Geo and Field loading"); + float bz{0.f}; + o2::its::GeometryTGeo* gman = nullptr; + loadGeometryAndField(magfile, inputGeom, bz, gman); + LOGP(info, "Finished Geo and Field loading"); + + // MC tracks + LOGP(info, "MC Track Info loading"); + std::vector* mcArr = nullptr; + o2::dataformats::MCEventHeader* mcEvent = nullptr; + TTree* mcTree = nullptr; + std::vector> info; + loadMCTrackInfo(kinefile, info, mcArr, mcEvent, mcTree); + LOGP(info, "Finished MC Track Info loading"); + + // Reconstructed tracks + LOGP(info, "Reco Tracks loading"); + TTree* recTree = nullptr; + std::vector* recArr = nullptr; + std::vector* trkLabArr = nullptr; + loadRecoTracks(tracfile, recTree, recArr, trkLabArr); + LOGP(info, "Finished Reco Tracks loading"); + + // Run analyzeRecoTracks + LOGP(info, "Track analysis (analyzeRecoTracks)"); + ULong_t unaccounted{0}, good{0}, fakes{0}, total{0}; + analyzeRecoTracks(recTree, recArr, trkLabArr, info, bz, unaccounted, good, fakes, total); + LOGP(info, "Finished track analysis (analyzeRecoTracks)"); + + // Topology dictionary + LOGP(info, "Topology Dictionary loading"); + o2::its3::TopologyDictionary dict; + loadTopologyDictionary(dictfile, dict); + LOGP(info, "Finished Topology Dictionary loading"); + + // Clusters + LOGP(info, "Cluster Data loading"); + TTree* clusTree = nullptr; + std::vector* clusArr = nullptr; + o2::dataformats::MCTruthContainer* clusLabArr = nullptr; + std::vector* patternsPtr = nullptr; + std::vector mc2rofVec; + std::vector rofRecVec; + loadCluster(clusfile, clusTree, clusArr, clusLabArr, mc2rofVec, patternsPtr, rofRecVec); + LOGP(info, "Finished Cluster Data loading"); + // clusTree->GetEntry(0); + + // Hits + LOGP(info, "Hits loading"); + TTree* hitTree = nullptr; + std::vector*> hitVecPool; + std::vector> mc2hitVec; + prepareHitAccess(hitfile, hitTree, hitVecPool, mc2hitVec); + LOGP(info, "Finished Hits loading"); + + // Build min and max MC events used by each ROF + LOGP(info, "Building MC event ranges"); + std::vector mcEvMin, mcEvMax; + mcEvMin.assign(rofRecVec.size(), hitTree->GetEntries()); + mcEvMax.assign(rofRecVec.size(), -1); + buildMcEvRangePerROF(mc2rofVec, rofRecVec.size(), mcEvMin, mcEvMax); + LOGP(info, "Initial MC event ranges built"); + unsigned int nROFRec = rofRecVec.size(); + + // Map hits for MC events + LOGP(info, "Map hits for MC events"); + mapHitsForMCEvents(hitTree, hitVecPool, mc2hitVec, mcEvMin, mcEvMax, nROFRec); + LOGP(info, "Mapped hits for MC events"); + + // Run cluster particle matching + auto pattIt = patternsPtr->cbegin(); + for (unsigned int iClus = 0; iClus < clusArr->size(); ++iClus) { + auto lab = (clusLabArr->getLabels(iClus))[0]; + const auto& c = (*clusArr)[iClus]; + // Ensure pattIt is advanced even if cluster is skipped + if (!lab.isValid() || lab.getSourceID() != 0 || !lab.isCorrect()) { + if (c.getPatternID() == CompCluster::InvalidPatternID) { + o2::itsmft::ClusterPattern::skipPattern(pattIt); + } + continue; + } + + int trackID{0}, evID{0}, srcID{0}; + bool fake{false}; + lab.get(trackID, evID, srcID, fake); + if (evID < 0 || static_cast(evID) >= info.size() || trackID < 0 || static_cast(trackID) >= info[evID].size()) { + if (c.getPatternID() == CompCluster::InvalidPatternID) { + o2::itsmft::ClusterPattern::skipPattern(pattIt); + } + continue; + } + UShort_t chipID = c.getSensorID(); + int layer = gman->getLayer(chipID); + bool isIB = layer < 3; + info[evID][trackID].clusters |= 1 << layer; + + o2::math_utils::Point3D clusterPos; + int clusterSize; + auto pattID = c.getPatternID(); + o2::itsmft::ClusterPattern patt; + if (!resolvePattern(c, pattIt, dict, isIB, patt)) { + continue; + } + clusterSize = patt.getNPixels(); + clusterPos = dict.getClusterCoordinates(c, patt, false); + + if (isIB) { + CurvedLocalToFlat(clusterPos, SegmentationIB(layer)); + } + + info[evID][trackID].clusterSize[layer] = clusterSize; + info[evID][trackID].clusterPattern[layer] = pattID; + info[evID][trackID].clusterLocX[layer] = clusterPos.X(); + info[evID][trackID].clusterLocZ[layer] = clusterPos.Z(); + + const auto& mc2hit = mc2hitVec[lab.getEventID()]; + const auto* hitArray = hitVecPool[lab.getEventID()]; + uint64_t key = (uint64_t(trackID) << 32) + c.getSensorID(); + auto hitIt = mc2hit.find(key); + if (hitIt == mc2hit.end()) + continue; + const auto& hit = (*hitArray)[hitIt->second]; + + auto hitLocSta = gman->getMatrixL2G(chipID) ^ hit.GetPosStart(); + auto hitLocEnd = gman->getMatrixL2G(chipID) ^ hit.GetPos(); + + if (isIB) { + CurvedLocalToFlat(hitLocSta, SegmentationIB(layer)); + CurvedLocalToFlat(hitLocEnd, SegmentationIB(layer)); + info[evID][trackID].hitLocX[layer] = 0.5f * (hitLocSta.X() + hitLocEnd.X()); + info[evID][trackID].hitLocY[layer] = 0.5f * (hitLocSta.Y() + hitLocEnd.Y()); + info[evID][trackID].hitLocZ[layer] = 0.5f * (hitLocSta.Z() + hitLocEnd.Z()); + } else { + auto x0 = hitLocSta.X(), dx = hitLocEnd.X() - x0; + auto y0 = hitLocSta.Y(), dy = hitLocEnd.Y() - y0; + auto z0 = hitLocSta.Z(), dz = hitLocEnd.Z() - z0; + auto r = (0.5f * (SegmentationOB::SensorLayerThickness - SegmentationOB::SensorLayerThicknessEff) - y0) / dy; + info[evID][trackID].hitLocX[layer] = x0 + r * dx; + info[evID][trackID].hitLocY[layer] = y0 + r * dy; + info[evID][trackID].hitLocZ[layer] = z0 + r * dz; + } + } + + LOGP(info, "Finished cluster-to-particle matching"); + + // The following part generates statistical histograms and outputs a TTree + int nb = 100; + double xbins[nb + 1], ptcutl = 0.01, ptcuth = 10.; + double a = std::log(ptcuth / ptcutl) / nb; + for (int i = 0; i <= nb; ++i) { + xbins[i] = ptcutl * std::exp(i * a); + } + auto* h_pt_num = new TH1D("h_pt_num", ";#it{p}_{T} (GeV/#it{c});Number of tracks", nb, xbins); + auto* h_pt_den = new TH1D("h_pt_den", ";#it{p}_{T} (GeV/#it{c});Number of generated primary particles", nb, xbins); + auto* h_pt_eff = new TEfficiency("h_pt_eff", "Tracking Efficiency;#it{p}_{T} (GeV/#it{c});Eff.", nb, xbins); + + auto* h_eta_num = new TH1D("h_eta_num", ";#it{#eta};Number of tracks", 60, -3, 3); + auto* h_eta_den = new TH1D("h_eta_den", ";#it{#eta};Number of generated particles", 60, -3, 3); + auto* h_eta_eff = new TEfficiency("h_eta_eff", "Tracking Efficiency;#it{#eta};Eff.", 60, -3, 3); + + auto* h_phi_num = new TH1D("h_phi_num", ";#varphi;Number of tracks", 360, 0., 2 * TMath::Pi()); + auto* h_phi_den = new TH1D("h_phi_den", ";#varphi;Number of generated particles", 360, 0., 2 * TMath::Pi()); + auto* h_phi_eff = new TEfficiency("h_phi_eff", "Tracking Efficiency;#varphi;Eff.", 360, 0., 2 * TMath::Pi()); + + auto* h_pt_fake = new TH1D("h_pt_fake", ";#it{p}_{T} (GeV/#it{c});Number of fake tracks", nb, xbins); + auto* h_pt_multifake = new TH1D("h_pt_multifake", ";#it{p}_{T} (GeV/#it{c});Number of multifake tracks", nb, xbins); + auto* h_pt_clones = new TH1D("h_pt_clones", ";#it{p}_{T} (GeV/#it{c});Number of cloned tracks", nb, xbins); + auto* h_dcaxy_vs_pt = new TH2D("h_dcaxy_vs_pt", ";#it{p}_{T} (GeV/#it{c});DCA_{xy} (#mum)", nb, xbins, 2000, -500., 500.); + auto* h_dcaxy_vs_eta = new TH2D("h_dcaxy_vs_eta", ";#it{#eta};DCA_{xy} (#mum)", 60, -3, 3, 2000, -500., 500.); + auto* h_dcaxy_vs_phi = new TH2D("h_dcaxy_vs_phi", ";#varphi;DCA_{xy} (#mum)", 360, 0., 2 * TMath::Pi(), 2000, -500., 500.); + auto* h_dcaz_vs_pt = new TH2D("h_dcaz_vs_pt", ";#it{p}_{T} (GeV/#it{c});DCA_{z} (#mum)", nb, xbins, 2000, -500., 500.); + auto* h_dcaz_vs_eta = new TH2D("h_dcaz_vs_eta", ";#it{#eta};DCA_{z} (#mum)", 60, -3, 3, 2000, -500., 500.); + auto* h_dcaz_vs_phi = new TH2D("h_dcaz_vs_phi", ";#varphi;DCA_{z} (#mum)", 360, 0., 2 * TMath::Pi(), 2000, -500., 500.); + auto* h_chi2 = new TH2D("h_chi2", ";#it{p}_{T} (GeV/#it{c});#chi^{2};Number of tracks", nb, xbins, 200, 0., 100.); + + for (auto& evInfo : info) { + for (auto& part : evInfo) { + if ((part.clusters & 0x7f) != 0x7f) { + // part.clusters != 0x3f && part.clusters != 0x3f << 1 && + // part.clusters != 0x1f && part.clusters != 0x1f << 1 && part.clusters + // != 0x1f << 2 && part.clusters != 0x0f && part.clusters != 0x0f << 1 + // && part.clusters != 0x0f << 2 && part.clusters != 0x0f << 3) { + continue; + } + if (!part.isPrimary) { + continue; + } + + h_pt_den->Fill(part.pt); + h_eta_den->Fill(part.eta); + h_phi_den->Fill(part.phi); + + if (part.isReco != 0u) { + h_pt_num->Fill(part.pt); + h_eta_num->Fill(part.eta); + h_phi_num->Fill(part.phi); + if (std::abs(part.eta) < 0.5) { + h_dcaxy_vs_pt->Fill(part.pt, part.dcaxy * 10000); + h_dcaz_vs_pt->Fill(part.pt, part.dcaz * 10000); + } + h_dcaz_vs_eta->Fill(part.eta, part.dcaz * 10000); + h_dcaxy_vs_eta->Fill(part.eta, part.dcaxy * 10000); + h_dcaxy_vs_phi->Fill(part.phi, part.dcaxy * 10000); + h_dcaz_vs_phi->Fill(part.phi, part.dcaz * 10000); + + h_chi2->Fill(part.pt, part.track.getChi2()); + + if (part.isReco > 1) { + for (int _i{0}; _i < part.isReco - 1; ++_i) { + h_pt_clones->Fill(part.pt); + } + } + } + if (part.isFake != 0u) { + h_pt_fake->Fill(part.pt); + if (part.isFake > 1) { + for (int _i{0}; _i < part.isFake - 1; ++_i) { + h_pt_multifake->Fill(part.pt); + } + } + } + } + } + + LOGP(info, "Streaming output TTree to file"); + TFile file("CorrTracksClusters.root", "recreate"); + TTree tree("ParticleInfo", "ParticleInfo"); + ParticleInfo pInfo; + tree.Branch("particle", &pInfo); + for (auto& event : info) { + for (auto& part : event) { + int nCl{0}; + for (unsigned int bit{0}; bit < sizeof(pInfo.clusters) * 8; ++bit) { + nCl += bool(part.clusters & (1 << bit)); + } + if (nCl < 3) { + continue; + } + pInfo = part; + tree.Fill(); + } + } + tree.Write(); + h_pt_num->Write(); + h_eta_num->Write(); + h_phi_num->Write(); + h_pt_den->Write(); + h_eta_den->Write(); + h_phi_den->Write(); + h_pt_multifake->Write(); + h_pt_fake->Write(); + h_dcaxy_vs_pt->Write(); + h_dcaz_vs_pt->Write(); + h_dcaxy_vs_eta->Write(); + h_dcaxy_vs_phi->Write(); + h_dcaz_vs_eta->Write(); + h_dcaz_vs_phi->Write(); + h_pt_clones->Write(); + h_chi2->Write(); + + h_pt_eff->SetTotalHistogram(*h_pt_den, ""); + h_pt_eff->SetPassedHistogram(*h_pt_num, ""); + h_pt_eff->SetTitle("Tracking Efficiency;#it{p}_{T} (GeV/#it{c});Eff."); + h_pt_eff->Write(); + + h_phi_eff->SetTotalHistogram(*h_phi_den, ""); + h_phi_eff->SetPassedHistogram(*h_phi_num, ""); + h_phi_eff->SetTitle("Tracking Efficiency;#varphi;Eff."); + h_phi_eff->Write(); + + h_eta_eff->SetTotalHistogram(*h_eta_den, ""); + h_eta_eff->SetPassedHistogram(*h_eta_num, ""); + h_eta_eff->SetTitle("Tracking Efficiency;#it{#eta};Eff."); + h_eta_eff->Write(); + + file.Close(); + LOGP(info, "Finished streaming output TTree to file"); + LOGP(info, "done."); +} diff --git a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C index cc241afb3357a..76d7bf09de77f 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C @@ -52,7 +52,7 @@ #endif -void CreateDictionariesITS3(bool saveDeltas = false, +void CreateDictionariesITS3(bool saveDeltas = true, float probThreshold = 1e-6, std::string clusDictFile = "", std::string clusfile = "o2clus_its.root", @@ -94,7 +94,7 @@ void CreateDictionariesITS3(bool saveDeltas = false, TNtuple* nt = nullptr; if (saveDeltas) { fout = TFile::Open("CreateDictionaries.root", "recreate"); - nt = new TNtuple("nt", "hashes ntuple", "hash:dx:dz"); + nt = new TNtuple("nt", "hashes ntuple", "hash:layer:chipID:xhf:zhf:xcf:zcf:dx:dz:outlimDx:outlimDz"); } const o2::steer::DigitizationContext* digContext = nullptr; @@ -284,19 +284,25 @@ void CreateDictionariesITS3(bool saveDeltas = false, dZ = xyzLocM.Z() - locC.Z(); dX /= (ib) ? o2::its3::SegmentationMosaix::PitchRow : o2::itsmft::SegmentationAlpide::PitchRow; dZ /= (ib) ? o2::its3::SegmentationMosaix::PitchCol : o2::itsmft::SegmentationAlpide::PitchCol; - if (saveDeltas) { - nt->Fill(topology.getHash(), dX, dZ); - } + + float outLimitDx{-1}, outLimitDz{-1}; if (checkOutliers > 0.) { - if (bool bX = std::abs(dX) > topology.getRowSpan() * checkOutliers, bZ = std::abs(dZ) > topology.getColumnSpan() * checkOutliers; bX || bZ) { // ignore outlier + outLimitDx = topology.getRowSpan() * checkOutliers; + outLimitDz = topology.getColumnSpan() * checkOutliers; + bool isOutDx = std::abs(dX) > outLimitDx; + bool isOutDz = std::abs(dZ) > outLimitDz; + if (isOutDx || isOutDz) { // ignore outlier (ib) ? ++cOutliersIB : ++cOutliersOB; - LOGP(debug, "Ignored Value dX={} > {} * {} -> {}", dX, topology.getRowSpan(), checkOutliers, bX); - LOGP(debug, "Ignored Value dZ={} > {} * {} -> {}", dZ, topology.getColumnSpan(), checkOutliers, bZ); + LOGP(debug, "Ignored Value dX={} > {} * {} -> {}", dX, topology.getRowSpan(), checkOutliers, isOutDx); + LOGP(debug, "Ignored Value dZ={} > {} * {} -> {}", dZ, topology.getColumnSpan(), checkOutliers, isOutDz); dX = dZ = BuildTopologyDictionary::IgnoreVal; } else { (ib) ? ++cOkIB : ++cOkOB; } } + if (saveDeltas) { + nt->Fill(topology.getHash(), layer, chipID, xyzLocM.X(), xyzLocM.Z(), locC.X(), locC.Z(), dX, dZ, outLimitDx, outLimitDz); + } } } else { /* LOGP(info, " Failed to find MC hit entry for Tr: {} chipID: {}", trID, chipID); */ diff --git a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt index 2fad72a96426d..8c4722012224d 100644 --- a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt @@ -15,8 +15,11 @@ o2_add_library(ITS3Simulation src/DescriptorInnerBarrelITS3.cxx src/Digitizer.cxx src/DigiParams.cxx + src/ITS3DPLDigitizerParam.cxx + src/ChipDigitsContainer.cxx + src/ChipSimResponse.cxx PUBLIC_LINK_LIBRARIES O2::SimulationDataFormat - O2::ITSBase O2::ITSMFTSimulation + O2::ITSBase O2::ITSMFTSimulation O2::ITSMFTBase ROOT::Physics) o2_target_root_dictionary(ITS3Simulation @@ -25,6 +28,9 @@ o2_target_root_dictionary(ITS3Simulation include/ITS3Simulation/DescriptorInnerBarrelITS3.h include/ITS3Simulation/Digitizer.h include/ITS3Simulation/DigiParams.h + include/ITS3Simulation/ITS3DPLDigitizerParam.h + include/ITS3Simulation/ChipDigitsContainer.h + include/ITS3Simulation/ChipSimResponse.h ) o2_data_file(COPY data DESTINATION Detectors/ITS3/simulation) diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h new file mode 100644 index 0000000000000..0c9627fe412c3 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h @@ -0,0 +1,59 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_ITS3_CHIPDIGITSCONTAINER_ +#define ALICEO2_ITS3_CHIPDIGITSCONTAINER_ + +#include "ITSMFTBase/SegmentationAlpide.h" // Base class in o2::itsmft namespace +#include "ITSMFTSimulation/ChipDigitsContainer.h" // Base class in o2::itsmft namespace +#include "ITS3Base/SegmentationMosaix.h" // OB segmentation implementation +#include "ITS3Base/SpecsV2.h" // Provides SpecsV2::isDetITS3() interface +#include "ITS3Simulation/DigiParams.h" // ITS3-specific DigiParams interface +#include + +namespace o2::its3 +{ + +class ChipDigitsContainer : public o2::itsmft::ChipDigitsContainer +{ + private: + bool innerBarrel; ///< true if the chip belongs to the inner barrel (IB), false if outer barrel (OB) + int maxRows; ///< maximum number of rows + int maxCols; ///< maximum number of columns + + public: + explicit ChipDigitsContainer(UShort_t idx = 0); + + using SegmentationIB = SegmentationMosaix; + using SegmentationOB = o2::itsmft::SegmentationAlpide; + + /// Returns whether the chip is in the inner barrel (IB) + void setChipIndex(UShort_t idx) + { + o2::itsmft::ChipDigitsContainer::setChipIndex(idx); + innerBarrel = constants::detID::isDetITS3(getChipIndex()); + maxRows = innerBarrel ? SegmentationIB::NRows : SegmentationOB::NRows; + maxCols = innerBarrel ? SegmentationIB::NCols : SegmentationOB::NCols; + } + + int getMaxRows() const { return maxRows; } + int getMaxCols() const { return maxCols; } + bool isIB() const; + /// Adds noise digits, deleted the one using the itsmft::DigiParams interface + void addNoise(UInt_t rofMin, UInt_t rofMax, const o2::itsmft::DigiParams* params, int maxRows = o2::itsmft::SegmentationAlpide::NRows, int maxCols = o2::itsmft::SegmentationAlpide::NCols) = delete; + void addNoise(UInt_t rofMin, UInt_t rofMax, const o2::its3::DigiParams* params); + + ClassDefNV(ChipDigitsContainer, 1); +}; + +} // namespace o2::its3 + +#endif // ALICEO2_ITS3_CHIPDIGITSCONTAINER_ \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h new file mode 100644 index 0000000000000..f96fde9fb0d55 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h @@ -0,0 +1,41 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_ITS3SIMULATION_CHIPSIMRESPONSE_H +#define ALICEO2_ITS3SIMULATION_CHIPSIMRESPONSE_H + +#include "ITSMFTSimulation/AlpideSimResponse.h" + +namespace o2 +{ +namespace its3 +{ + +class ChipSimResponse : public o2::itsmft::AlpideSimResponse +{ + public: + ChipSimResponse() = default; + ChipSimResponse(const ChipSimResponse& other) = default; + + float getRespCentreDep() const { return mRespCentreDep; } + void computeCentreFromData(); + void initData(int tableNumber, std::string dataPath, const bool quiet = true); + + private: + float mRespCentreDep = 0.f; + + ClassDef(ChipSimResponse, 1); +}; + +} // namespace its3 +} // namespace o2 + +#endif // ALICEO2_ITS3SIMULATION_CHIPSIMRESPONSE_H \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h index eca0a71949ba7..5764dfbd7d593 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h @@ -13,21 +13,43 @@ #define ITS3_DIGIPARAMS_H #include "ITSMFTSimulation/DigiParams.h" +#include "ITS3Simulation/ChipSimResponse.h" namespace o2::its3 { class DigiParams final : public o2::itsmft::DigiParams { + private: + float mIBNoisePerPixel = 1.e-8; + int mIBChargeThreshold = 150; ///< charge threshold in Nelectrons + int mIBMinChargeToAccount = 15; ///< minimum charge contribution to account + int mIBNSimSteps = 18; ///< number of steps in response simulation + float mIBNSimStepsInv = 0; ///< its inverse + public: + DigiParams(); + + void setIBNoisePerPixel(float v) { mIBNoisePerPixel = v; } + float getIBNoisePerPixel() const { return mIBNoisePerPixel; } + + void setIBChargeThreshold(int v, float frac2Account = 0.1); + int getIBChargeThreshold() const { return mIBChargeThreshold; } + + void setIBNSimSteps(int v); + int getIBNSimSteps() const { return mIBNSimSteps; } + float getIBNSimStepsInv() const { return mIBNSimStepsInv; } + + int getIBMinChargeToAccount() const { return mIBMinChargeToAccount; } + const o2::itsmft::AlpideSimResponse* getAlpSimResponse() const = delete; void setAlpSimResponse(const o2::itsmft::AlpideSimResponse* par) = delete; const o2::itsmft::AlpideSimResponse* getOBSimResponse() const { return mOBSimResponse; } void setOBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mOBSimResponse = response; } - const o2::itsmft::AlpideSimResponse* getIBSimResponse() const { return mIBSimResponse; } - void setIBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mIBSimResponse = response; } + o2::its3::ChipSimResponse* getIBSimResponse() const { return mIBSimResponse; } + void setIBSimResponse(o2::its3::ChipSimResponse* response); bool hasResponseFunctions() const { return mIBSimResponse != nullptr && mOBSimResponse != nullptr; } @@ -35,7 +57,7 @@ class DigiParams final : public o2::itsmft::DigiParams private: const o2::itsmft::AlpideSimResponse* mOBSimResponse = nullptr; //!< pointer to external response - const o2::itsmft::AlpideSimResponse* mIBSimResponse = nullptr; //!< pointer to external response + o2::its3::ChipSimResponse* mIBSimResponse = nullptr; //!< pointer to external response ClassDef(DigiParams, 1); }; diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h index 8d0f06a27343b..edc5583c03d5a 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h @@ -21,12 +21,13 @@ #include "Rtypes.h" #include "TObject.h" -#include "ITSMFTSimulation/ChipDigitsContainer.h" #include "ITSMFTSimulation/AlpideSimResponse.h" #include "ITSMFTSimulation/Hit.h" #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/SegmentationMosaix.h" #include "ITS3Simulation/DigiParams.h" +#include "ITS3Simulation/ChipDigitsContainer.h" +#include "ITS3Simulation/ChipSimResponse.h" #include "DataFormatsITSMFT/Digit.h" #include "DataFormatsITSMFT/ROFRecord.h" #include "CommonDataFormat/InteractionRecord.h" @@ -78,7 +79,7 @@ class Digitizer : public TObject private: void processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID, int srcID); - void registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, + void registerDigits(o2::its3::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl); ExtraDig* getExtraDigBuffer(uint32_t roFrame) @@ -108,7 +109,7 @@ class Digitizer : public TObject static constexpr std::array mIBSegmentations{0, 1, 2}; - o2::itsmft::AlpideSimResponse* mSimRespIB = nullptr; // simulated response for IB + o2::its3::ChipSimResponse* mSimRespIB = nullptr; // simulated response for IB o2::itsmft::AlpideSimResponse* mSimRespOB = nullptr; // simulated response for OB bool mSimRespIBOrientation{false}; // wether the orientation in the IB response function is flipped float mSimRespIBShift{0.f}; // adjusting the Y-shift in the IB response function to match sensor local coord. @@ -118,7 +119,7 @@ class Digitizer : public TObject const o2::its::GeometryTGeo* mGeometry = nullptr; ///< ITS3 geometry - std::vector mChips; ///< Array of chips digits containers + std::vector mChips; ///< Array of chips digits containers std::deque> mExtraBuff; ///< burrer (per roFrame) for extra digits std::vector* mDigits = nullptr; //! output digits diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h new file mode 100644 index 0000000000000..3192f73fb8f79 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h @@ -0,0 +1,32 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_ITS3DPLDIGITIZERPARAM_H_ +#define ALICEO2_ITS3DPLDIGITIZERPARAM_H_ + +#include "CommonUtils/ConfigurableParam.h" +#include "CommonUtils/ConfigurableParamHelper.h" + +namespace o2::its3 +{ + +struct ITS3DPLDigitizerParam : public o2::conf::ConfigurableParamHelper { + float IBNoisePerPixel = 1.e-8; ///< MOSAIX Noise per channel + int IBChargeThreshold = 150; ///< charge threshold in Nelectrons for IB + int IBMinChargeToAccount = 15; ///< minimum charge contribution to account for IB + int nIBSimSteps = 18; ///< number of steps in response for IB + + O2ParamDef(ITS3DPLDigitizerParam, "ITS3DPLDigitizerParam"); +}; + +} // namespace o2::its3 + +#endif \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx b/Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx new file mode 100644 index 0000000000000..0611f7002f160 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx @@ -0,0 +1,63 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "ITS3Simulation/ChipDigitsContainer.h" + +namespace o2 +{ +namespace its3 +{ + +ChipDigitsContainer::ChipDigitsContainer(UShort_t idx) + : o2::itsmft::ChipDigitsContainer(idx) {} + +bool ChipDigitsContainer::isIB() const +{ + return innerBarrel; +} + +void ChipDigitsContainer::addNoise(UInt_t rofMin, UInt_t rofMax, const o2::its3::DigiParams* params) +{ + UInt_t row = 0; + UInt_t col = 0; + Int_t nhits = 0; + constexpr float ns2sec = 1e-9; + float mean = 0.f; + int nel = 0; + + if (isIB()) { + // Inner barrel: use ITS3-specific noise interface with OB segmentation. + mean = params->getIBNoisePerPixel() * SegmentationOB::NPixels; + nel = static_cast(params->getIBChargeThreshold() * 1.1); + } else { + // Outer barrel: use base class noise interface with IB segmentation. + mean = params->getNoisePerPixel() * SegmentationIB::NPixels; + nel = static_cast(params->getChargeThreshold() * 1.1); + } + + for (UInt_t rof = rofMin; rof <= rofMax; ++rof) { + nhits = gRandom->Poisson(mean); + for (Int_t i = 0; i < nhits; ++i) { + row = gRandom->Integer(maxRows); + col = gRandom->Integer(maxCols); + if (mNoiseMap && mNoiseMap->isNoisy(mChipIndex, row, col)) + continue; + if (mDeadChanMap && mDeadChanMap->isNoisy(mChipIndex, row, col)) + continue; + auto key = getOrderingKey(rof, row, col); + if (!findDigit(key)) + addDigit(key, rof, row, col, nel, o2::MCCompLabel(true)); + } + } +} + +} // namespace its3 +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx b/Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx new file mode 100644 index 0000000000000..1c482983f0d0a --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx @@ -0,0 +1,62 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "ITS3Simulation/ChipSimResponse.h" +#include +#include + +using namespace o2::its3; + +ClassImp(o2::its3::ChipSimResponse); + +void ChipSimResponse::initData(int tableNumber, std::string dataPath, const bool quiet) +{ + AlpideSimResponse::initData(tableNumber, dataPath, quiet); + computeCentreFromData(); +} + +void ChipSimResponse::computeCentreFromData() +{ + std::vector zVec, qVec; + const int npix = o2::itsmft::AlpideRespSimMat::getNPix(); + + for (int iz = 0; iz < mNBinDpt; ++iz) { + size_t bin = iz + mNBinDpt * (0 + mNBinRow * 0); + const auto& mat = mData[bin]; + float val = mat.getValue(npix / 2, npix / 2); + float gz = mDptMin + iz / mStepInvDpt; + zVec.push_back(gz); + qVec.push_back(val); + } + + std::vector> zqPairs; + for (size_t i = 0; i < zVec.size(); ++i) { + zqPairs.emplace_back(zVec[i], qVec[i]); + } + std::sort(zqPairs.begin(), zqPairs.end()); + zVec.clear(); + qVec.clear(); + for (auto& p : zqPairs) { + zVec.push_back(p.first); + qVec.push_back(p.second); + } + + float intQ = 0.f, intZQ = 0.f; + for (size_t i = 0; i + 1 < zVec.size(); ++i) { + float z0 = zVec[i], z1 = zVec[i + 1]; + float q0 = qVec[i], q1 = qVec[i + 1]; + float dz = z1 - z0; + intQ += 0.5f * (q0 + q1) * dz; + intZQ += 0.5f * (z0 * q0 + z1 * q1) * dz; + } + + mRespCentreDep = (intQ > 0.f) ? intZQ / intQ : 0.f; +} diff --git a/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx index a9f17a544b3c4..afa02ec44741d 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx @@ -14,27 +14,67 @@ #include "Framework/Logger.h" #include "ITS3Simulation/DigiParams.h" +#include ClassImp(o2::its3::DigiParams); namespace o2::its3 { +DigiParams::DigiParams() +{ + // make sure the defaults are consistent + setIBNSimSteps(mIBNSimSteps); +} + +void DigiParams::setIBNSimSteps(int v) +{ + // set number of sampling steps in silicon + mIBNSimSteps = v > 0 ? v : 1; + mIBNSimStepsInv = 1.f / mIBNSimSteps; +} + +void DigiParams::setIBChargeThreshold(int v, float frac2Account) +{ + // set charge threshold for digits creation and its fraction to account + // contribution from single hit + mIBChargeThreshold = v; + mIBMinChargeToAccount = v * frac2Account; + if (mIBMinChargeToAccount < 0 || mIBMinChargeToAccount > mIBChargeThreshold) { + mIBMinChargeToAccount = mIBChargeThreshold; + } + LOG(info) << "Set Mosaix charge threshold to " << mIBChargeThreshold + << ", single hit will be accounted from " << mIBMinChargeToAccount + << " electrons"; +} + void DigiParams::print() const { // print settings - LOGF(info, "ITS3 DigiParams settings:"); - LOGF(info, "Continuous readout : %s", isContinuous() ? "ON" : "OFF"); - LOGF(info, "Readout Frame Length(ns) : %f", getROFrameLength()); - LOGF(info, "Strobe delay (ns) : %f", getStrobeDelay()); - LOGF(info, "Strobe length (ns) : %f", getStrobeLength()); - LOGF(info, "Threshold (N electrons) : %d", getChargeThreshold()); - LOGF(info, "Min N electrons to account : %d", getMinChargeToAccount()); - LOGF(info, "Number of charge sharing steps : %d", getNSimSteps()); - LOGF(info, "ELoss to N electrons factor : %e", getEnergyToNElectrons()); - LOGF(info, "Noise level per pixel : %e", getNoisePerPixel()); - LOGF(info, "Charge time-response:\n"); + printf("ITS3 DigiParams settings:\n"); + printf("Continuous readout : %s\n", isContinuous() ? "ON" : "OFF"); + printf("Readout Frame Length(ns) : %f\n", getROFrameLength()); + printf("Strobe delay (ns) : %f\n", getStrobeDelay()); + printf("Strobe length (ns) : %f\n", getStrobeLength()); + printf("IB Threshold (N electrons) : %d\n", getIBChargeThreshold()); + printf("OB Threshold (N electrons) : %d\n", getChargeThreshold()); + printf("Min N electrons to account for IB : %d\n", getIBMinChargeToAccount()); + printf("Min N electrons to account for OB : %d\n", getMinChargeToAccount()); + printf("Number of charge sharing steps of IB : %d\n", getIBNSimSteps()); + printf("Number of charge sharing steps of OB : %d\n", getNSimSteps()); + printf("ELoss to N electrons factor : %e\n", getEnergyToNElectrons()); + printf("Noise level per pixel of IB : %e\n", getIBNoisePerPixel()); + printf("Noise level per pixel of OB : %e\n", getNoisePerPixel()); + printf("Charge time-response:\n"); getSignalShape().print(); } +void DigiParams::setIBSimResponse(o2::its3::ChipSimResponse* response) +{ + mIBSimResponse = response; + if (mIBSimResponse) { + mIBSimResponse->computeCentreFromData(); + } +} + } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx index 3c75bf3e8f680..1d1d15a91f89b 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx @@ -27,7 +27,8 @@ #include using o2::itsmft::Hit; -using SegmentationAlpide = o2::itsmft::SegmentationAlpide; +using SegmentationOB = o2::itsmft::SegmentationAlpide; +using SegmentationIB = o2::its3::SegmentationMosaix; using o2::itsmft::AlpideRespSimMat; using o2::itsmft::PreDigit; @@ -46,8 +47,8 @@ void Digitizer::init() } if (!mParams.hasResponseFunctions()) { - auto loadSetResponseFunc = [&](const char* name, const char* fileIB, const char* nameIB, const char* fileOB, const char* nameOB) { - LOGP(info, "Loading response function for {}: IB={}:{} ; OB={}:{}", name, nameIB, fileIB, nameOB, fileOB); + auto loadSetResponseFunc = [&](const char* fileIB, const char* nameIB, const char* fileOB, const char* nameOB) { + LOGP(info, "Loading response function IB={}:{} ; OB={}:{}", nameIB, fileIB, nameOB, fileOB); auto fIB = TFile::Open(fileIB, "READ"); if (!fIB || fIB->IsZombie() || !fIB->IsOpen()) { LOGP(fatal, "Cannot open file {}", fileIB); @@ -56,7 +57,7 @@ void Digitizer::init() if (!fOB || fOB->IsZombie() || !fOB->IsOpen()) { LOGP(fatal, "Cannot open file {}", fileOB); } - mParams.setIBSimResponse(mSimRespIB = fIB->Get(nameIB)); + mParams.setIBSimResponse(mSimRespIB = fIB->Get(nameIB)); mParams.setOBSimResponse(mSimRespOB = fOB->Get(nameOB)); fIB->Close(); fOB->Close(); @@ -64,25 +65,27 @@ void Digitizer::init() if (const auto& func = ITS3Params::Instance().chipResponseFunction; func == "Alpide") { constexpr const char* responseFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; - loadSetResponseFunc("Alpide", responseFile, "response0", responseFile, "response1"); - mSimRespIBShift = mSimRespIB->getDepthMax() - SegmentationMosaix::SensorLayerThickness / 2.f + 10.e-4f; - mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; + loadSetResponseFunc(responseFile, "response0", responseFile, "response0"); + mSimRespIBScaleX = o2::itsmft::SegmentationAlpide::PitchRow / SegmentationIB::PitchRow; + mSimRespIBScaleZ = o2::itsmft::SegmentationAlpide::PitchCol / SegmentationIB::PitchCol; } else if (func == "APTS") { constexpr const char* responseFileIB = "$(O2_ROOT)/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/APTSResponseData.root"; constexpr const char* responseFileOB = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; - loadSetResponseFunc("APTS", responseFileIB, "response1", responseFileOB, "response1"); - mSimRespIBShift = mSimRespIB->getDepthMax() + (float)constants::pixelarray::pixels::apts::responseYShift; - mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; - mSimRespIBScaleX = 0.5f * constants::pixelarray::pixels::apts::pitchX / SegmentationMosaix::PitchRow; - mSimRespIBScaleZ = 0.5f * constants::pixelarray::pixels::apts::pitchZ / SegmentationMosaix::PitchCol; + loadSetResponseFunc(responseFileIB, "response1", responseFileOB, "response0"); + mSimRespIBScaleX = constants::pixelarray::pixels::apts::pitchX / SegmentationIB::PitchRow; + mSimRespIBScaleZ = constants::pixelarray::pixels::apts::pitchZ / SegmentationIB::PitchCol; mSimRespIBOrientation = true; } else { LOGP(fatal, "ResponseFunction '{}' not implemented!", func); } + mSimRespIBShift = mSimRespIB->getDepthMax() - constants::silicon::thickness / 2.f; + mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationOB::SensorLayerThickness / 2.f; } + mParams.print(); - LOGP(info, "IBShift = {} ; OBShift = {}", mSimRespIBShift, mSimRespOBShift); - LOGP(info, "IB-Scale: X={} ; Z={}", mSimRespIBScaleX, mSimRespIBScaleZ); + LOGP(info, "IB shift = {} ; OB shift = {}", mSimRespIBShift, mSimRespOBShift); + LOGP(info, "IB pixel scale on x = {} ; z = {}", mSimRespIBScaleX, mSimRespIBScaleZ); + LOGP(info, "IB response orientation: {}", mSimRespIBOrientation ? "flipped" : "normal"); mIRFirstSampledTF = o2::raw::HBFUtils::Instance().getFirstSampledTFIR(); } @@ -173,11 +176,7 @@ void Digitizer::fillOutputContainer(uint32_t frameLast) auto& extra = *(mExtraBuff.front().get()); for (size_t iChip{0}; iChip < mChips.size(); ++iChip) { auto& chip = mChips[iChip]; - if (constants::detID::isDetITS3(iChip)) { // Check if this is a chip of ITS3 - chip.addNoise(mROFrameMin, mROFrameMin, &mParams, SegmentationMosaix::NRows, SegmentationMosaix::NCols); - } else { - chip.addNoise(mROFrameMin, mROFrameMin, &mParams); - } + chip.addNoise(mROFrameMin, mROFrameMin, &mParams); auto& buffer = chip.getPreDigits(); if (buffer.empty()) { continue; @@ -190,7 +189,7 @@ void Digitizer::fillOutputContainer(uint32_t frameLast) break; // is the digit ROFrame from the key > the max requested frame } auto& preDig = iter->second; // preDigit - if (preDig.charge >= mParams.getChargeThreshold()) { + if (preDig.charge >= (chip.isIB() ? mParams.getIBChargeThreshold() : mParams.getChargeThreshold())) { int digID = mDigits->size(); mDigits->emplace_back(chip.getChipIndex(), preDig.row, preDig.col, preDig.charge); mMCLabels->addElement(digID, preDig.labelRef.label); @@ -257,16 +256,15 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } // here we start stepping in the depth of the sensor to generate charge diffision - float nStepsInv = mParams.getNSimStepsInv(); - int nSteps = mParams.getNSimSteps(); int detID{hit.GetDetectorID()}; int layer = mGeometry->getLayer(detID); const auto& matrix = mGeometry->getMatrixL2G(detID); - bool innerBarrel{layer < 3}; + int nSteps = chip.isIB() ? mParams.getIBNSimSteps() : mParams.getNSimSteps(); + float nStepsInv = chip.isIB() ? mParams.getIBNSimStepsInv() : mParams.getNSimStepsInv(); math_utils::Vector3D xyzLocS, xyzLocE; xyzLocS = matrix ^ (hit.GetPosStart()); // Global hit coordinates to local detector coordinates xyzLocE = matrix ^ (hit.GetPos()); - if (innerBarrel) { + if (chip.isIB()) { // transform the point on the curved surface to a flat one float xFlatE{0.f}, yFlatE{0.f}, xFlatS{0.f}, yFlatS{0.f}; mIBSegmentations[layer].curvedToFlat(xyzLocS.X(), xyzLocS.Y(), xFlatS, yFlatS); @@ -284,7 +282,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID xyzLocS += stepH; // Adjust start position to the middle of the first step xyzLocE -= stepH; // Adjust end position to the middle of the last step int rowS = -1, colS = -1, rowE = -1, colE = -1, nSkip = 0; - if (innerBarrel) { + if (chip.isIB()) { // get entrance pixel row and col while (!mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { @@ -301,14 +299,14 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } } else { // get entrance pixel row and col - while (!SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + while (!SegmentationOB::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } xyzLocS += step; } // get exit pixel row and col - while (!SegmentationAlpide::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + while (!SegmentationOB::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } @@ -327,8 +325,8 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID rowE += AlpideRespSimMat::NPix / 2; rowS = std::max(rowS, 0); - const int maxNrows{innerBarrel ? SegmentationMosaix::NRows : SegmentationAlpide::NRows}; - const int maxNcols{innerBarrel ? SegmentationMosaix::NCols : SegmentationAlpide::NCols}; + const int maxNrows{chip.isIB() ? SegmentationIB::NRows : SegmentationOB::NRows}; + const int maxNcols{chip.isIB() ? SegmentationIB::NCols : SegmentationOB::NCols}; rowE = std::min(rowE, maxNrows - 1); colS -= AlpideRespSimMat::NPix / 2; @@ -352,22 +350,22 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID // take into account that the AlpideSimResponse depth defintion has different min/max boundaries // although the max should coincide with the surface of the epitaxial layer, which in the chip // local coordinates has Y = +SensorLayerThickness/2 - xyzLocS.SetY(xyzLocS.Y() + ((innerBarrel) ? mSimRespIBShift : mSimRespOBShift)); + xyzLocS.SetY(xyzLocS.Y() + ((chip.isIB()) ? mSimRespIBShift : mSimRespOBShift)); // collect charge in evey pixel which might be affected by the hit for (int iStep = nSteps; iStep--;) { // Get the pixel ID - if (innerBarrel) { + if (chip.isIB()) { mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } else { - SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + SegmentationOB::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } if (row != rowPrev || col != colPrev) { // update pixel and coordinates of its center - if (innerBarrel) { + if (chip.isIB()) { if (!mIBSegmentations[layer].detectorToLocal(row, col, cRowPix, cColPix)) { continue; } - } else if (!SegmentationAlpide::detectorToLocal(row, col, cRowPix, cColPix)) { + } else if (!SegmentationOB::detectorToLocal(row, col, cRowPix, cColPix)) { continue; // should not happen } rowPrev = row; @@ -377,13 +375,13 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID // note that response needs coordinates along column row (locX) (locZ) then depth (locY) float rowMax{}, colMax{}; const AlpideRespSimMat* rspmat{nullptr}; - if (innerBarrel) { - rowMax = 0.5f * SegmentationMosaix::PitchRow; - colMax = 0.5f * SegmentationMosaix::PitchCol; + if (chip.isIB()) { + rowMax = 0.5f * SegmentationIB::PitchRow * mSimRespIBScaleX; + colMax = 0.5f * SegmentationIB::PitchCol * mSimRespIBScaleZ; rspmat = mSimRespIB->getResponse(mSimRespIBScaleX * (xyzLocS.X() - cRowPix), mSimRespIBScaleZ * (xyzLocS.Z() - cColPix), xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); } else { - rowMax = 0.5f * SegmentationAlpide::PitchRow; - colMax = 0.5f * SegmentationAlpide::PitchCol; + rowMax = 0.5f * SegmentationOB::PitchRow; + colMax = 0.5f * SegmentationOB::PitchCol; rspmat = mSimRespOB->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); } @@ -402,7 +400,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID if (colDest < 0 || colDest >= colSpan) { continue; } - respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, ((innerBarrel && mSimRespIBOrientation) ? !flipRow : flipRow), flipCol); + respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, ((chip.isIB() && mSimRespIBOrientation) ? !flipRow : flipRow), flipCol); } } } @@ -419,7 +417,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } int nEle = gRandom->Poisson(nElectrons * nEleResp); // total charge in given pixel // ignore charge which have no chance to fire the pixel - if (nEle < mParams.getMinChargeToAccount()) { + if (nEle < (chip.isIB() ? mParams.getIBChargeThreshold() : mParams.getChargeThreshold())) { continue; } uint16_t colIS = icol + colS; @@ -428,7 +426,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } } -void Digitizer::registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, +void Digitizer::registerDigits(o2::its3::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl) { // Register digits for given pixel, accounting for the possible signal contribution to @@ -442,7 +440,7 @@ void Digitizer::registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t r tStrobe += mParams.getROFrameLength(); // for the next ROF // discard too small contributions, they have no chance to produce a digit - if (nEleROF < mParams.getMinChargeToAccount()) { + if (nEleROF < (chip.isIB() ? mParams.getIBChargeThreshold() : mParams.getChargeThreshold())) { continue; } if (roFr > mEventROFrameMax) { diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx b/Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx new file mode 100644 index 0000000000000..69314b8a0be9b --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx @@ -0,0 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "ITS3Simulation/ITS3DPLDigitizerParam.h" + +O2ParamImpl(o2::its3::ITS3DPLDigitizerParam) \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h index fca3f5d63c2c4..921512193f98b 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h @@ -20,5 +20,8 @@ #pragma link C++ class o2::its3::DescriptorInnerBarrelITS3 + ; #pragma link C++ class o2::its3::DigiParams + ; #pragma link C++ class o2::its3::Digitizer + ; +#pragma link C++ class o2::its3::ITS3DPLDigitizerParam + ; +#pragma link C++ class o2::its3::ChipDigitsContainer + ; +#pragma link C++ class o2::its3::ChipSimResponse + ; #endif diff --git a/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx b/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx index 27f876f7bc24b..af0af091d40e8 100644 --- a/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx +++ b/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx @@ -27,6 +27,7 @@ #include "DataFormatsITSMFT/ROFRecord.h" #include "ITS3Simulation/Digitizer.h" #include "ITSMFTSimulation/DPLDigitizerParam.h" +#include "ITS3Simulation/ITS3DPLDigitizerParam.h" #include "ITSMFTBase/DPLAlpideParam.h" #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/ITS3Params.h" @@ -216,6 +217,7 @@ class ITS3DPLDigitizerTask : BaseDPLDigitizer mDigitizer.setGeometry(geom); const auto& dopt = o2::itsmft::DPLDigitizerParam::Instance(); + const auto& doptIB = o2::its3::ITS3DPLDigitizerParam::Instance(); pc.inputs().get*>("ITS_alppar"); const auto& aopt = o2::itsmft::DPLAlpideParam::Instance(); digipar.setContinuous(dopt.continuous); @@ -238,6 +240,11 @@ class ITS3DPLDigitizerTask : BaseDPLDigitizer digipar.setTimeOffset(dopt.timeOffset); digipar.setNSimSteps(dopt.nSimSteps); + // ITS3 inner barrel specific parameters + digipar.setIBChargeThreshold(doptIB.IBChargeThreshold); + digipar.setIBNSimSteps(doptIB.nIBSimSteps); + digipar.setIBNoisePerPixel(doptIB.IBNoisePerPixel); + mROMode = digipar.isContinuous() ? o2::parameters::GRPObject::CONTINUOUS : o2::parameters::GRPObject::PRESENT; LOG(info) << mID.getName() << " simulated in " << ((mROMode == o2::parameters::GRPObject::CONTINUOUS) ? "CONTINUOUS" : "TRIGGERED") From e4399252ad5567ec680ed552fe5330ac9a51ce24 Mon Sep 17 00:00:00 2001 From: Hadi Hassan Date: Wed, 7 May 2025 13:09:49 +0300 Subject: [PATCH 0320/1764] Increasing the FOCAL volume based on the opening (#14241) --- Detectors/FOCAL/simulation/src/Detector.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/FOCAL/simulation/src/Detector.cxx b/Detectors/FOCAL/simulation/src/Detector.cxx index dc71c1066afdf..164b531e60918 100644 --- a/Detectors/FOCAL/simulation/src/Detector.cxx +++ b/Detectors/FOCAL/simulation/src/Detector.cxx @@ -451,7 +451,7 @@ void Detector::ConstructGeometry() } float pars[4]; - pars[0] = (mGeometry->getFOCALSizeX() + 2 * mGeometry->getMiddleTowerOffset()) / 2; + pars[0] = (mGeometry->getFOCALSizeX() + 2 * mGeometry->getMiddleTowerOffset() + mGeometry->getDetectorOpeningRight() + mGeometry->getDetectorOpeningLeft()) / 2; pars[1] = mGeometry->getFOCALSizeY() / 2; pars[2] = mGeometry->getFOCALSizeZ() / 2; // Add space to place 2 SiPad layers in front of ECAL @@ -984,7 +984,7 @@ void Detector::CreateECALGeometry() // Place the towers in the ECAL // --- Place the ECAL in FOCAL float fcal_pars[4]; - fcal_pars[0] = (geom->getFOCALSizeX() + 2. * geom->getMiddleTowerOffset()) / 2.; + fcal_pars[0] = (geom->getFOCALSizeX() + 2. * geom->getMiddleTowerOffset() + mGeometry->getDetectorOpeningRight() + mGeometry->getDetectorOpeningLeft()) / 2.; fcal_pars[1] = geom->getFOCALSizeY() / 2.; fcal_pars[2] = geom->getECALSizeZ() / 2.; fcal_pars[3] = 0.; From a28ef94e0010e819d44d01b6dcdec64db75c6ff9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 7 May 2025 13:33:27 +0200 Subject: [PATCH 0321/1764] GPU TPC: Do not shift track in Z after the last fit --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 5bd8fd556aa3f..2524c01f0c00b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -407,7 +407,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } - if (((nWays - iWay) & 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { + if (((nWays - iWay) & 1) && (iWay != nWays - 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { ShiftZ2(clusters, clustersXYZ, merger, maxN); } } From 8789c46104bc200f83a423b5840a783441e872fa Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 7 May 2025 16:02:04 +0200 Subject: [PATCH 0322/1764] GPU: Fix Debug Dump of deterministic mode without dEdx (was segfaulting in sync mode) --- GPU/GPUTracking/Base/GPUParam.cxx | 6 +++--- GPU/GPUTracking/Base/GPUParam.h | 2 +- GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 6 +++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 6 +++--- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index b835e1b198eea..57f23792e472a 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -135,9 +135,9 @@ void GPUParam::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessi UpdateRun3ClusterErrors(p->param.tpcErrorParamY, p->param.tpcErrorParamZ); } if (w) { - par.dodEdx = dodEdxDownscaled = w->steps.isSet(GPUDataTypes::RecoStep::TPCdEdx); - if (par.dodEdx && p && p->tpcDownscaledEdx != 0) { - dodEdxDownscaled = (rand() % 100) < p->tpcDownscaledEdx; + par.dodEdx = dodEdxEnabled = w->steps.isSet(GPUDataTypes::RecoStep::TPCdEdx); + if (dodEdxEnabled && p && p->tpcDownscaledEdx != 0) { + dodEdxEnabled = (rand() % 100) < p->tpcDownscaledEdx; } } if (d) { diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 4b77628c88775..076d25d72d178 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -54,7 +54,7 @@ struct GPUParam_t { float bzCLight; float qptB5Scaler; - int8_t dodEdxDownscaled; + int8_t dodEdxEnabled; int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 84835a1695071..a38148ccb375a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -256,7 +256,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; - if (param().dodEdxDownscaled) { + if (param().dodEdxEnabled) { GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index b6241ad36b5de..6e7de7ee48ca6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -299,7 +299,7 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) void* GPUTPCGMMerger::SetPointersOutput(void* mem) { computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); - if (mRec->GetParam().dodEdxDownscaled) { + if (mRec->GetParam().dodEdxEnabled) { computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 2f8fbecadce5f..ac55f423b1c42 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -203,7 +203,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() - << " dEdx " << (trk.OK() ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) + << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; } out << std::setprecision(ss); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index ea219a02a1887..9dc6ddc59c2b4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -44,7 +44,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock constexpr uint8_t flagsReject = getFlagsReject(); const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); - bool cutOnTrackdEdx = merger.Param().par.dodEdx && merger.Param().dodEdxDownscaled && merger.Param().rec.tpc.minTrackdEdxMax2Tot > 0.f; + bool cutOnTrackdEdx = merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.minTrackdEdxMax2Tot > 0.f; GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); uint2* GPUrestrict() tmpData = merger.ClusRefTmp(); @@ -130,7 +130,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if (merger.Param().par.dodEdx && merger.Param().dodEdxDownscaled) { + if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); oTrack.setdEdxAlt(tracksdEdxAlt[i]); } @@ -148,7 +148,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11], outerPar.C[12], outerPar.C[13], outerPar.C[14]})); - if (merger.Param().par.dodEdx && merger.Param().dodEdxDownscaled && merger.Param().rec.tpc.enablePID) { + if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) { PIDResponse pidResponse{}; auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); auto pidRemap = merger.Param().rec.tpc.PID_remap[pid]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 2524c01f0c00b..f5bfbe985fb8c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -216,7 +216,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); @@ -367,7 +367,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxDownscaled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { float qtot = 0, qmax = 0, pad = 0, relTime = 0; @@ -426,7 +426,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. - if (param.par.dodEdx && param.dodEdxDownscaled) { + if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); } From 42e1f6ac595fe09ada3cf36ae4baddd2b03aba44 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 7 May 2025 20:00:01 +0200 Subject: [PATCH 0323/1764] GPU: debug files of multiple GPU Reconstruciton instances shall not overwrite each other --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 1 + GPU/GPUTracking/Base/GPUReconstruction.h | 2 ++ GPU/GPUTracking/Global/GPUChainTracking.cxx | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index ad2ee2e840d00..3ef995b9f9561 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -87,6 +87,7 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos throw std::invalid_argument("Cannot be slave to a slave"); } mMaster = cfg.master; + mSlaveId = cfg.master->mSlaves.size(); cfg.master->mSlaves.emplace_back(this); } param().SetDefaults(mGRPSettings.get()); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index f5b39cb370b9e..d5c0b8e828087 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -190,6 +190,7 @@ class GPUReconstruction // Helpers to fetch processors from other shared libraries virtual void GetITSTraits(std::unique_ptr* trackerTraits, std::unique_ptr* vertexerTraits, std::unique_ptr* timeFrame); bool slavesExist() { return mSlaves.size() || mMaster; } + int slaveId() { return mSlaveId; } // Getters / setters for parameters DeviceType GetDeviceType() const; @@ -339,6 +340,7 @@ class GPUReconstruction GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc. std::vector mSlaves; // Ptr to slave GPUReconstructions + int mSlaveId = -1; // Id of this slave (-1 for master) // Others bool mInitialized = false; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index a3f9b996e070d..f8d4165477220 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -378,7 +378,8 @@ int32_t GPUChainTracking::Init() } if (GetProcessingSettings().debugLevel >= 6) { - mDebugFile->open(mRec->IsGPU() ? "GPU.out" : "CPU.out"); + std::string filename = std::string(mRec->IsGPU() ? "GPU" : "CPU") + (mRec->slaveId() != -1 ? (std::string("_slave") + std::to_string(mRec->slaveId())) : std::string(mRec->slavesExist() ? "_master" : "")) + ".out"; + mDebugFile->open(filename.c_str()); } return 0; From 689970d53e9172450ec9ee1b0c14e125df741e8a Mon Sep 17 00:00:00 2001 From: jokonig Date: Wed, 7 May 2025 15:09:50 +0200 Subject: [PATCH 0324/1764] [EMCAL-539] Fix in trigger simulation to not accept every event as MB - Only events where EMCal is triggered should be given the trigger flag. This was previously not the case --- .../include/EMCALSimulation/Digitizer.h | 1 + .../EMCALSimulation/DigitsWriteoutBuffer.h | 17 +++++++++++++++-- .../EMCAL/workflow/src/EMCALDigitizerSpec.cxx | 12 +++++++----- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h b/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h index ad296a4d65a58..66f85184c98e6 100644 --- a/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h +++ b/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h @@ -72,6 +72,7 @@ class Digitizer : public TObject double getEventTime() const { return mDigits.getEventTime(); } bool isLive(double t) const { return mDigits.isLive(t); } bool isLive() const { return mDigits.isLive(); } + bool isCurrentEventTriggered() const { return mDigits.isCurrentEventTriggered(); } void setDebugStreaming(bool doStreaming) { mEnableDebugStreaming = doStreaming; } diff --git a/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h b/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h index b6f486ddf2add..5713f2ef18ad9 100644 --- a/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h +++ b/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h @@ -60,8 +60,21 @@ class DigitsWriteoutBuffer double getTriggerTime() const { return mTriggerTime; } double getEventTime() const { return mLastEventTime; } - bool isLive(double t) const { return ((t - mTriggerTime) < mLiveTime || (t - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); } - bool isLive() const { return ((mLastEventTime - mTriggerTime) < mLiveTime || (mLastEventTime - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); } + bool isLive(double t) const + { + return ((t - mTriggerTime) < mLiveTime || (t - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); + } + bool isLive() const + { + return ((mLastEventTime - mTriggerTime) < (mLiveTime - mPreTriggerTime) || (mLastEventTime - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); + } + + /// Check if current collision was triggered + /// \return true if event was triggered + bool isCurrentEventTriggered() const + { + return mLastEventTime == mTriggerTime; + } // function returns true if the collision occurs 600ns before the readout window is open // Look here for more details https://alice.its.cern.ch/jira/browse/EMCAL-681 diff --git a/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx b/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx index 5de966d1b6a4c..cabdb2c74d818 100644 --- a/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx +++ b/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx @@ -332,15 +332,17 @@ void DigitizerSpec::run(framework::ProcessingContext& ctx) if (!trigger.any()) { continue; } - // Trigger sim: Prepare CTP input digit - acceptedTriggers.push_back(std::make_tuple(timesview[collID], trigger)); - LOG(debug) << "EMCAL TRU simulation: Sending trg = " << trigger << " to CTP"; mDigitizer.setEventTime(timesview[collID], trigger.any()); - - if (!mDigitizer.isLive()) { + if (!mDigitizer.isCurrentEventTriggered()) { + LOG(debug) << "reject collision"; continue; } + LOG(debug) << "accept collision"; + + // Trigger sim: Prepare CTP input digit + acceptedTriggers.push_back(std::make_tuple(timesview[collID], trigger)); + LOG(debug) << "EMCAL TRU simulation: Sending trg = " << trigger << " to CTP"; // for each collision, loop over the constituents event and source IDs // (background signal merging is basically taking place here) From 440e50040510d18bcc9742e75bfc2a4898a17c8c Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 8 May 2025 18:18:01 +0200 Subject: [PATCH 0325/1764] =?UTF-8?q?Revert=20"DPL=20Analysis:=20prevent?= =?UTF-8?q?=20slice=20cache=20from=20updating=20when=20not=20required=20by?= =?UTF-8?q?=20=E2=80=A6"=20(#14252)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Framework/Core/include/Framework/ASoA.h | 12 ++--- .../Core/include/Framework/AnalysisManagers.h | 14 ++---- .../Core/include/Framework/AnalysisTask.h | 22 ++++----- .../Framework/ArrowTableSlicingCache.h | 45 ++++++----------- .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 ++++--- Framework/Core/src/ArrowTableSlicingCache.cxx | 49 +++++++++---------- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 73 insertions(+), 96 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 2e478a8ca64a6..e098cd89f6d5d 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - Entry bindingKey; + StringPair bindingKey; bool isMissing() const; - Entry const& getBindingKey() const; + StringPair const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.key.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.second.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index e310f3eef990c..330eaf01f0be4 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,43 +534,39 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, Cache&, Cache&) +bool registerCache(T&, std::vector&, std::vector&) { return false; } template requires std::same_as -bool registerCache(T& preslice, Cache& bsks, Cache&) +bool registerCache(T& preslice, std::vector& bsks, std::vector&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); - } else if (locate->enabled == false) { - locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, Cache&, Cache& bsksU) +bool registerCache(T& preslice, std::vector&, std::vector& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); - } else if (locate->enabled == false) { - locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index 9bd2e2af173cc..c7f3da1948c62 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -66,20 +66,20 @@ concept is_enumeration = is_enumeration_v>; namespace { struct AnalysisDataProcessorBuilder { template - static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) + static void addGroupingCandidates(std::vector& bk, std::vector& bku) { - [&bk, &bku, enabled](framework::pack) mutable { + [&bk, &bku](framework::pack) mutable { std::string key; if constexpr (soa::is_iterator>) { key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); } - ([&bk, &bku, &key, enabled]() mutable { + ([&bk, &bku, &key]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key, enabled); + framework::updatePairList(bku, binding, key); } else { - framework::updatePairList(bk, binding, key, enabled); + framework::updatePairList(bk, binding, key); } } }(), @@ -147,7 +147,7 @@ struct AnalysisDataProcessorBuilder { /// helper to parse the process arguments /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, Cache&, Cache&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. @@ -156,17 +156,17 @@ struct AnalysisDataProcessorBuilder { /// 2. grouping case - 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache& bk, Cache& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku, value); + addGroupingCandidates(bk, bku); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache&, Cache&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); @@ -480,8 +480,8 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - Cache bindingsKeys; - Cache bindingsKeysUnsorted; + std::vector bindingsKeys; + std::vector bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 292a67023fc5e..2edc23a63ce76 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,64 +34,51 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -struct Entry { - std::string binding; - std::string key; - bool enabled; - - Entry(std::string b, std::string k, bool e = true) - : binding{b}, - key{k}, - enabled{e} - { - } -}; - -using Cache = std::vector; +using StringPair = std::pair; -void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); +void updatePairList(std::vector& list, std::string const& binding, std::string const& key); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - Cache bindingsKeys; - Cache bindingsKeysUnsorted; + std::vector bindingsKeys; + std::vector bindingsKeysUnsorted; - void setCaches(Cache&& bsks); - void setCachesUnsorted(Cache&& bsks); + void setCaches(std::vector&& bsks); + void setCachesUnsorted(std::vector&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - Cache bindingsKeys; + std::vector bindingsKeys; std::vector>> values; std::vector>> counts; - Cache bindingsKeysUnsorted; + std::vector bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); + ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); // set caching information externally - void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); + void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(Entry const& bindingKey) const; - int getCachePosSortedFor(Entry const& bindingKey) const; - int getCachePosUnsortedFor(Entry const& bindingKey) const; + std::pair getCachePos(StringPair const& bindingKey) const; + int getCachePosSortedFor(StringPair const& bindingKey) const; + int getCachePosUnsortedFor(StringPair const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(Entry const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; + SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); + static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index b8436314b057e..64b1d863c59e6 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = Entry(binding, mIndexColumnName); + auto bk = std::make_pair(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 5940bc0427225..810398747de88 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -197,7 +197,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -Entry const& PreslicePolicyBase::getBindingKey() const +StringPair const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 3b13e30581f70..12a4c7131e828 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,27 +567,26 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, - Cache{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0u; i < caches.size(); ++i) { - if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); + for (auto i = 0; i < caches.size(); ++i) { + if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0u; i < unsortedCaches.size(); ++i) { - if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); + for (auto i = 0; i < unsortedCaches.size(); ++i) { + if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 12df5ef6c080b..4b31f96e32fba 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -11,7 +11,6 @@ #include "Framework/ArrowTableSlicingCache.h" #include "Framework/RuntimeError.h" -#include "Framework/Logger.h" #include #include @@ -20,10 +19,10 @@ namespace o2::framework { -void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) +void updatePairList(std::vector& list, std::string const& binding, std::string const& key) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }) == list.end()) { - list.emplace_back(binding, key, enabled); + if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { + list.emplace_back(binding, key); } } @@ -66,17 +65,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) +void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -87,7 +86,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorte groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -112,7 +111,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -129,11 +128,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k, e] = bindingsKeysUnsorted[pos]; - if (!e) { - LOG(debug) << "Update of disabled cache requested"; - return arrow::Status::OK(); - } + auto& [b, k] = bindingsKeysUnsorted[pos]; auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -144,7 +139,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if ((int)groups[pos].size() <= v) { + if (groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -156,7 +151,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -166,41 +161,41 @@ std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); } return getCacheUnsortedForPos(p); @@ -229,9 +224,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key, enabled] = bindingKey; + auto const& [target, key] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 091c21eeae229..161939141e790 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = Entry(soa::getLabelFromType(), "fID"); + auto bk = std::make_pair(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = Entry(soa::getLabelFromType(), "fID"); + auto bk = std::make_pair(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From 9647d481ee3e039dec060e2f005a49617e21d4e5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 00:18:02 +0200 Subject: [PATCH 0326/1764] GPU: Add debug dumps for compressed / uncompressed TPC clusters --- GPU/GPUTracking/CMakeLists.txt | 1 + .../DataCompression/GPUTPCCompression.cxx | 56 +++++++++++++ .../DataCompression/GPUTPCCompression.h | 4 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Global/GPUChain.h | 31 +++----- GPU/GPUTracking/Global/GPUChainTracking.h | 1 + .../Global/GPUChainTrackingClusterizer.cxx | 19 ++--- .../Global/GPUChainTrackingCompression.cxx | 3 + .../Global/GPUChainTrackingDebug.h | 79 +++++++++++++++++++ .../GPUChainTrackingDebugAndProfiling.cxx | 15 ++++ .../Global/GPUChainTrackingMerger.cxx | 19 ++--- .../Global/GPUChainTrackingSectorTracker.cxx | 15 ++-- .../TPCClusterFinder/GPUTPCClusterFinder.h | 1 + .../GPUTPCClusterFinderDump.cxx | 2 +- 14 files changed, 201 insertions(+), 47 deletions(-) create mode 100644 GPU/GPUTracking/Global/GPUChainTrackingDebug.h diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index b2852389398d0..2e26622d05291 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -138,6 +138,7 @@ set(HDRS_INSTALL Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h + Global/GPUChainTrackingDebug.h Global/GPUChainTrackingGetters.inc Global/GPUErrorCodes.h Merger/GPUTPCGMBorderTrack.h diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 2a0c5b58d8a83..a107f749ddd77 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -130,3 +130,59 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters += 16 - (mMaxClusters % 16); } } + +void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) +{ + const o2::tpc::CompressedClusters O = *mOutputFlat; + out << "\n\nCompressed Clusters:\n"; + out << O.nTracks << " Tracks\n"; + out << "Slice Row Clusters:\n"; + for (uint32_t i = 0; i < NSECTORS; i++) { + out << "Sector " << i << ": "; + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", "; + } + out << "\n"; + } + out << "\nTrack Clusters:\n"; + for (uint32_t i = 0; i < O.nTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << O.nTrackClusters[i] << ", "; + } + out << "\n\nUnattached Clusters\n"; + uint32_t offset = 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << "Sector " << i << " Row " << j << ": "; + for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l = k + offset; + out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; + } + offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; + out << "\n"; + } + } + out << "\n\nAttached Clusters\n"; + offset = 0; + for (uint32_t i = 0; i < O.nTracks; i++) { + out << "Track " << i << ": {" << (uint32_t)O.qPtA[i] << ", " << (uint32_t)O.rowA[i] << ", " << (uint32_t)O.sliceA[i] << ", " << (uint32_t)O.timeA[i] << ", " << (uint32_t)O.padA[i] << "} - "; + for (uint32_t k = 0; k < O.nTrackClusters[i]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l1 = k + offset, l2 = k + offset - i; + out << "["; + if (k) { + out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", "; + } + out << (uint32_t)O.qTotA[l1] << ", " << (uint32_t)O.qMaxA[l1] << ", " << (uint32_t)O.flagsA[l1] << ", " << (uint32_t)O.sigmaPadA[l1] << ", " << (uint32_t)O.sigmaTimeA[l1] << "] "; + } + offset += O.nTrackClusters[i]; + out << "\n"; + } +} diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index c1d9fe283fbea..52585b4c08b24 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -57,6 +57,10 @@ class GPUTPCCompression : public GPUProcessor GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); } GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); } +#ifndef GPUCA_GPUCODE + void DumpCompressedClusters(std::ostream& out); +#endif + protected: struct memory { uint32_t nStoredTracks = 0; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 4c32c3e46e3a7..638a3ed43d2aa 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking") AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") -AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 5df324fcba648..aca1bb2420fb6 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -20,6 +20,7 @@ #include "GPUKernelClassesFwd.h" #include +#include namespace o2::gpu { @@ -226,12 +227,19 @@ class GPUChain virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; } template - bool DoDebugAndDump(RecoStep step, int32_t mask, T& processor, S T::*func, Args&&... args) + bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::*func, Args&&... args) { return DoDebugAndDump(step, mask, true, processor, func, args...); } template - bool DoDebugAndDump(RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args); + bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args); + template + bool DoDebugDump(uint32_t mask, std::function func, Args&... args); + template + bool DoDebugDump(uint32_t mask, S* func, Args&&... args) + { + return DoDebugDump(mask, std::function([&func](Args&&... args_tmp) { (*func)(args_tmp...); }), args...); + } template int32_t runRecoStep(RecoStep step, S T::*func, Args... args); @@ -278,24 +286,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... } template -bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args) -{ - if (GetProcessingSettings().keepAllMemory) { - if (transfer) { - TransferMemoryResourcesToHost(step, &processor, -1, true); - } - if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) { - if (func) { - (processor.*func)(args...); - } - return true; - } - } - return false; -} - -template -int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) +inline int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) { if (GetRecoSteps().isSet(step)) { auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 8664652b549e3..13773a97d4e3d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -234,6 +234,7 @@ class GPUChainTracking : public GPUChain void PrepareDebugOutput(); void PrintDebugOutput(); void PrintOutputStat(); + static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index f188388e76a02..2cdd1bb76bf00 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -14,6 +14,7 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" @@ -813,7 +814,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (fragment.index == 0) { runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); } - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererZeroedCharges, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); if (doGPU) { if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { @@ -900,7 +901,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (!mIOPtrs.tpcZS) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { clusterer.DumpChargeMap(*mDebugFile, "Charges"); } @@ -919,13 +920,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Peaks"); } RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { uint32_t iSector = iSectorBase + lane; @@ -939,13 +940,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); } RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { uint32_t iSector = iSectorBase + lane; @@ -979,7 +980,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } // float time_clusterizer = 0, time_fill = 0, time_networks = 0; @@ -1092,7 +1093,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) #endif } else { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); } @@ -1111,7 +1112,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); laneHasData[lane] = true; // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off }); mRec->SetNActiveThreadsOuterLoop(1); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 24c74a661f18e..534c02a4c0a84 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUTrackingInputProvider.h" @@ -202,6 +203,7 @@ int32_t GPUChainTracking::RunTPCCompression() ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec); } mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR")); + DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile); return 0; } @@ -425,5 +427,6 @@ int32_t GPUChainTracking::RunTPCDecompression() } mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR")); } + DoDebugDump(GPUChainTrackingDebugFlags::TPCDecompressedClusters, &GPUChainTracking::DumpClusters, *mDebugFile, mIOPtrs.clustersNative); return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h new file mode 100644 index 0000000000000..810f40a1d8654 --- /dev/null +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -0,0 +1,79 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUChainTrackingDEBUG.h +/// \author David Rohr + +#ifndef GPUCHAINTRACKINGDEBUG_H +#define GPUCHAINTRACKINGDEBUG_H + +#include +#include +#include + +namespace o2::gpu +{ +// NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask +enum GPUChainTrackingDebugFlags : uint32_t { + TPCSectorTrackingData = 1, + TPCPreLinks = 2, + TPCLinks = 4, + TPCStartHits = 8, + TPCTracklets = 16, + TPCSectorTracks = 32, + TPCHitWeights = 256, + TPCCompressedClusters = 512, + TPCDecompressedClusters = 1024, + TPCMergingRanges = 2048, + TPCMergingSectorTracks = 4096, + TPCMergingMergedTracks = 8192, + TPCMergingCollectedTracks = 16384, + TPCMergingCE = 32768, + TPCMergingRefit = 65536, + TPCClustererClusters = 131072, + TPCClusterer = 262144, + TPCClustererDigits = 262144 << 1, + TPCClustererPeaks = 262144 << 2, + TPCClustererSuppressedPeaks = 262144 << 3, + TPCClustererChargeMap = 262144 << 4, + TPCClustererZeroedCharges = 262144 << 5 +}; + +template +inline bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args) +{ + if (GetProcessingSettings().keepAllMemory) { + if (transfer) { + TransferMemoryResourcesToHost(step, &processor, -1, true); + } + std::function lambda = [&processor, &func](Args&... args_tmp) { + if (func) { + (processor.*func)(args_tmp...); + } + }; + return DoDebugDump(mask, lambda, args...); + } + return false; +} + +template +inline bool GPUChain::DoDebugDump(uint32_t mask, std::function func, Args&... args) +{ + if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) { + func(args...); + return true; + } + return false; +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 53bdfbadd4b25..903505068ad2c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -333,3 +333,18 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster } } } + +void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters) +{ + out << "\nTPC Clusters:\n"; + for (uint32_t iSec = 0; iSec < GPUCA_NSECTORS; iSec++) { + out << "TPCClusters - Sector " << iSec << "\n"; + for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { + out << " Row: " << i << ": " << clusters->nClusters[iSec][i] << " clusters:\n"; + for (uint32_t j = 0; j < clusters->nClusters[iSec][i]; j++) { + const auto& cl = clusters->clusters[iSec][i][j]; + out << " " << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n"; + } + } + } +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index a38148ccb375a..6e86be03e7950 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" @@ -72,7 +73,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); mRec->ReturnVolatileDeviceMemory(); } @@ -135,14 +136,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -157,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -167,14 +168,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel({{1, -WarpSize(), 0, deviceType}}, 1); runKernel({{1, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCollectedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); if (param().rec.tpc.mergeCE) { runKernel(GetGridAuto(0, deviceType), true); RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType); RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCE, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); } int32_t waitForTransfer = 0; if (doGPU) { @@ -201,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -227,7 +228,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); @@ -240,7 +241,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); if (doGPU) { RecordMarker(&mEvents->single, 0); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 635641c00ae14..ef38d53173c2b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" @@ -176,7 +177,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 6) { *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; - if (GetProcessingSettings().debugMask & 1) { + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); } @@ -191,13 +192,13 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().keepDisplayMemory) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); - if (GetProcessingSettings().debugMask & 2) { + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCPreLinks) { trk.DumpLinks(*mDebugFile, 0); } } runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCLinks, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) { @@ -206,7 +207,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSector}}); } - DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCStartHits, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { trk.UpdateMaxData(); @@ -215,8 +216,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } runKernel({GetGridAuto(useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCTracklets, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCHitWeights && GetProcessingSettings().deterministicGPUReconstruction < 2) { trk.DumpHitWeights(*mDebugFile); } @@ -230,7 +231,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); } - DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCSectorTracks, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); }); mRec->SetNActiveThreadsOuterLoop(1); if (error) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 37399f5e4863f..35e2a7297338f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -36,6 +36,7 @@ class ConstMCTruthContainerView; namespace tpc { struct ClusterNative; +struct ClusterNativeAccess; class Digit; } // namespace tpc diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index da30375149b7c..d676cf9cd3887 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -157,7 +157,7 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out) { out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { + for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { size_t N = mPclusterInRow[i]; const tpc::ClusterNative* row = &mPclusterByRow[i * mNMaxClusterPerRow]; From f1fbd3547c5f944b35e114a04a06f8a80367dcea Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Thu, 8 May 2025 21:41:40 +0200 Subject: [PATCH 0327/1764] ctpdev: consistencycheck debug and ctpcfg config added (#14247) * dev: checkConsistency and ctp.cfg * dev: consistency checker * dev:ctpcfg to CCDB * clang * dev:ctpcfg * clang * fix * fix --------- Co-authored-by: Roman Lietava --- .../include/DataFormatsCTP/Configuration.h | 13 +++ .../Detectors/CTP/src/Configuration.cxx | 65 +++++++++++ .../Detectors/CTP/src/DataFormatsCTPLinkDef.h | 2 + .../CTPReconstruction/RawDataDecoder.h | 14 ++- .../CTP/reconstruction/src/RawDataDecoder.cxx | 107 ++++++++---------- Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 17 ++- .../include/CTPWorkflowScalers/RunManager.h | 2 +- .../CTPWorkflowScalers/ctpCCDBManager.h | 7 +- .../CTP/workflowScalers/src/RunManager.cxx | 10 +- .../CTP/workflowScalers/src/ctp-proxy.cxx | 7 +- .../workflowScalers/src/ctpCCDBManager.cxx | 30 +++++ 11 files changed, 205 insertions(+), 69 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h index 4ff0256f33827..fdd73986f1eaf 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h @@ -173,6 +173,8 @@ class CTPConfiguration uint64_t getDecrtiptorInputsMask(const std::string& name) const; std::map> getDet2InputMap(); uint64_t getTriggerClassMask() const; + uint64_t getTriggerClassMaskWInputs() const; + uint64_t getTriggerClassMaskWInputsNoTrgDets() const; std::vector getTriggerClassList() const; uint32_t getRunNumber() { return mRunNumber; }; std::vector getDetectorList() const; @@ -203,6 +205,17 @@ class CTPConfiguration std::ostream& operator<<(std::ostream& in, const CTPConfiguration& conf); +struct CtpCfg { + CtpCfg() = default; + std::string filename = "ctp.cfg"; + int readAndSave(std::string& path); + uint32_t TFOrbits = 0; + int ccdb = -1; // -1 means def constructor was called + uint32_t orbitShift = 0; + uint32_t irInputs_1_24 = 0; + uint32_t irInputs_25_48 = 0; + ClassDefNV(CtpCfg, 1) +}; } // namespace ctp } // namespace o2 #endif //_CTP_CONFIGURATION_H_ diff --git a/DataFormats/Detectors/CTP/src/Configuration.cxx b/DataFormats/Detectors/CTP/src/Configuration.cxx index 03f9b38db0e9f..38a49132db3d1 100644 --- a/DataFormats/Detectors/CTP/src/Configuration.cxx +++ b/DataFormats/Detectors/CTP/src/Configuration.cxx @@ -905,6 +905,30 @@ uint64_t CTPConfiguration::getTriggerClassMask() const } return clsmask; } +uint64_t CTPConfiguration::getTriggerClassMaskWInputs() const +{ + uint64_t clsmask = 0; + for (auto const& cls : mCTPClasses) { + if (cls.name.find("TRUE") != std::string::npos) { // ignoring internal ctp generators + continue; + } + clsmask |= cls.classMask; + } + return clsmask; +} +uint64_t CTPConfiguration::getTriggerClassMaskWInputsNoTrgDets() const +{ + uint64_t clsmask = 0; + for (auto const& cls : mCTPClasses) { + bool exclude = cls.name.find("TRUE") != std::string::npos; // ignoring internal ctp generators + exclude += cls.name.find("EMC") != std::string::npos; + exclude += cls.name.find("TRD") != std::string::npos; + exclude += cls.name.find("HMP") != std::string::npos; + if (!exclude) + clsmask |= cls.classMask; + } + return clsmask; +} // Hardware positions of classes std::vector CTPConfiguration::getTriggerClassList() const { @@ -1153,6 +1177,47 @@ int CTPInputsConfiguration::getInputIndexFromName(std::string& name) return 0xff; } +int CtpCfg::readAndSave(std::string& path) +{ + std::string file = path + filename; + std::ifstream ctpcfg(file); + if (ctpcfg.is_open()) { + std::string line; + while (std::getline(ctpcfg, line)) { + o2::utils::Str::trim(line); + if (line.size() == 0) { + continue; + } + if (line[0] == '#') { + continue; + } + std::vector tokens = o2::utils::Str::tokenize(line, ' '); + size_t ntokens = tokens.size(); + if (ntokens < 2) { + LOG(warn) << "Not enough tokens"; + continue; + } + if (tokens[0].find("TForbits") != std::string::npos) { + TFOrbits = std::atol(tokens[1].c_str()); + } else if (tokens[0].find("ccdb") != std::string::npos) { + ccdb = std::atoi(tokens[1].c_str()); + } else if (tokens[0].find("orbitshift") != std::string::npos) { + orbitShift = std::atol(tokens[1].c_str()); + } else if (tokens[0].find("ir_inputs") != std::string::npos) { + irInputs_1_24 = std::stoul(tokens[2].c_str(), nullptr, 16); + irInputs_25_48 = std::stoul(tokens[1].c_str(), nullptr, 16); + } else { + LOG(warn) << " Token not found:" << tokens[0]; + } + } + LOG(warn) << "Open file success:" << file; + } else { + LOG(warn) << "Can not open file:" << file; + return 1; + } + return 0; +} + std::ostream& o2::ctp::operator<<(std::ostream& in, const o2::ctp::CTPConfiguration& conf) { conf.printStream(in); diff --git a/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h b/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h index da21f779723f8..ac2a83d31edda 100644 --- a/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h +++ b/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h @@ -55,4 +55,6 @@ #pragma link C++ class o2::ctp::TriggerOffsetsParam + ; #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::ctp::TriggerOffsetsParam> + ; +#pragma link C++ class o2::ctp::CtpCfg + ; + #endif diff --git a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h index 7579e9dc1d6f5..8ebc7e0304561 100644 --- a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h +++ b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h @@ -55,7 +55,12 @@ class RawDataDecoder int init(); static int shiftNew(const o2::InteractionRecord& irin, uint32_t TFOrbit, std::bitset<48>& inpmask, int64_t shift, int level, std::map& digmap); static int shiftInputs(std::map& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask = 0xffffffffffffffff); - int checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask = 0xffffffffffffffff); + int checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask = 0xffffffffffffffff, uint64_t trigclassmaskNoTrgDets = 0xffffffffffffffff); + std::array getClassErrorsA() { return mClassErrorsA; } + std::array getClassErrorsB() { return mClassErrorsB; } + std::array getClassCountersA() { return mClassCountersA; } + std::array getClassCountersB() { return mClassCountersB; } + int getLostDueToShift() { return mLostDueToShift; } private: static constexpr uint32_t TF_TRIGGERTYPE_MASK = 0x800; @@ -83,8 +88,13 @@ class RawDataDecoder // error verbosness int mErrorIR = 0; int mErrorTCR = 0; - int mErrorMax = 3; + int mErrorMax = 5; bool mStickyError = false; + std::array mClassErrorsA{}; + std::array mClassErrorsB{}; // from inputs + std::array mClassCountersA{}; + std::array mClassCountersB{}; // from inputs + int mLostDueToShift = 0; CTPConfiguration mCTPConfig; }; } // namespace ctp diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index fa7fd673c7e85..797dd0e300519 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -294,13 +294,17 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask) { // int nClasswoInp = 0; // counting classes without input which should never happen - int nLM = 0; - int nL0 = 0; - int nL1 = 0; - int nTwI = 0; - int nTwoI = 0; - int nTwoIlost = 0; std::map digitsMapShifted; auto L0shift = o2::ctp::TriggerOffsetsParam::Instance().LM_L0; auto L1shift = L0shift + o2::ctp::TriggerOffsetsParam::Instance().L0_L1; @@ -593,86 +591,77 @@ int RawDataDecoder::shiftInputs(std::map& digit } } for (auto const& dig : digitsMapShifted) { - auto d = dig.second; - if ((d.CTPInputMask & LMMASKInputs).count()) { - nLM++; - } - if ((d.CTPInputMask & L0MASKInputs).count()) { - nL0++; - } - if ((d.CTPInputMask & L1MASKInputs).count()) { - nL1++; - } - if ((d.CTPClassMask).to_ulong() & trgclassmask) { - if (d.CTPInputMask.count()) { - nTwI++; - } else { - if (d.intRecord.bc == (o2::constants::lhc::LHCMaxBunches - L1shift)) { // input can be lost because latency class-l1input = 1 - nTwoIlost++; - } else { - // LOG(error) << d.intRecord << " " << d.CTPClassMask << " " << d.CTPInputMask; - // std::cout << "ERROR:" << std::hex << d.CTPClassMask << " " << d.CTPInputMask << std::dec << std::endl; - nTwoI++; - } - } - } digits.push_back(dig.second); } - int ret = 0; - if (nTwoI) { // Trigger class wo Input - LOG(error) << "LM:" << nLM << " L0:" << nL0 << " L1:" << nL1 << " TwI:" << nTwI << " Trigger classes wo input:" << nTwoI; - ret = 64; - } - if (nTwoIlost) { - LOG(warn) << " Trigger classes wo input from diff latency 1:" << nTwoIlost; - } - return ret; + return 0; } // -int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask) +int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask, uint64_t trgclassmaskNoTrgDet) { + LOG(debug) << "Checking readout"; int ret = 0; - int lost = 0; + static int nerror = 0; for (auto const& digit : digits) { // if class mask => inps for (int i = 0; i < digit.CTPClassMask.size(); i++) { - if (digit.CTPClassMask[i] & trgclassmask) { + bool trgcls = trgclassmask & (1ull << i); + if (digit.CTPClassMask[i] & trgcls) { const CTPClass* cls = mCTPConfig.getCTPClassFromHWIndex(i); if (cls == nullptr) { - LOG(error) << "Class mask index not found in CTP config:" << i; + if (nerror < mErrorMax) { + LOG(error) << "Class mask index not found in CTP config:" << i; + nerror++; + } ret = 128; continue; } + mClassCountersA[i]++; + if (cls->descriptor == nullptr) + continue; uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { - LOG(error) << "CTP class:" << cls->name << " inpmask:" << clsinpmask << " not compatible with inputs mask:" << diginpmask; + if (nerror < mErrorMax) { + LOG(error) << "Cls=>Inps: CTP class:" << cls->name << " inpmask:" << clsinpmask << " not compatible with inputs mask:" << diginpmask; + nerror++; + } + mClassErrorsA[i]++; ret = 128; } } } // if inps => class mask for (auto const& cls : mCTPConfig.getCTPClasses()) { - uint64_t clsinpmask = cls.descriptor->getInputsMask(); + // cls.printStream(std::cout); + if (cls.descriptor == nullptr) + continue; + uint64_t clsinpmask = cls.descriptor->getInputsMask(); // class definition uint64_t diginpmask = digit.CTPInputMask.to_ullong(); uint64_t digclsmask = digit.CTPClassMask.to_ullong(); if ((clsinpmask & diginpmask) == clsinpmask) { - if ((cls.classMask & digclsmask) == 0) { - int32_t BCShiftCorrection = -o2::ctp::TriggerOffsetsParam::Instance().customOffset[o2::detectors::DetID::CTP]; - int32_t offset = BCShiftCorrection + o2::ctp::TriggerOffsetsParam::Instance().LM_L0 + o2::ctp::TriggerOffsetsParam::Instance().L0_L1_classes - 1; - offset = o2::constants::lhc::LHCMaxBunches - offset; - if (digit.intRecord.bc < offset) { - LOG(error) << "CTP class:" << cls.name << " inpmask:" << clsinpmask << " cls mask:" << cls.classMask << " not found in digit:" << digit; - ret = 256; - } else { - lost++; + if (cls.classMask & trgclassmask) { + mClassCountersB[cls.getIndex()]++; + if ((cls.classMask & digclsmask) == 0) { + int32_t BCShiftCorrection = -o2::ctp::TriggerOffsetsParam::Instance().customOffset[o2::detectors::DetID::CTP]; + int32_t offset = BCShiftCorrection + o2::ctp::TriggerOffsetsParam::Instance().LM_L0 + o2::ctp::TriggerOffsetsParam::Instance().L0_L1_classes - 1; + offset = o2::constants::lhc::LHCMaxBunches - offset; + if (digit.intRecord.bc < offset) { + if ((nerror < mErrorMax) && (cls.classMask & ~trgclassmaskNoTrgDet)) { + LOG(info) << "Inp=>Cls: CTP class:" << cls.name << " inpmask:" << clsinpmask << " cls mask:" << cls.classMask << " not found in digit:" << digit; + nerror++; + } + mClassErrorsB[cls.getIndex()]++; + ret = 256; + } else { + mLostDueToShift++; + } } } } } } - if (lost) { - LOG(info) << "LOST classes because of shift:" << lost; + if (mLostDueToShift) { + LOG(debug) << "LOST classes because of shift:" << mLostDueToShift; } return ret; } diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 753f88114a14b..3f7c729b351a3 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -69,8 +69,21 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) o0 = TFOrbits[i]; } std::cout << std::endl; - std::cout << "Number of missing TF:" << nmiss << std::endl; - std::cout << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); + LOG(info) << "Number of missing TF:" << nmiss << std::endl; + if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) + LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + std::array clsA = mDecoder.getClassCountersA(); + std::array clsB = mDecoder.getClassCountersB(); + std::array clsEA = mDecoder.getClassErrorsA(); + std::array clsEB = mDecoder.getClassErrorsB(); + + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; + if (clsEA[i]) + LOG(error) << " Class without inputs:"; + LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; + } } void RawDecoderSpec::run(framework::ProcessingContext& ctx) { diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h index 72fb9c2056367..6d2172e3da165 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h @@ -72,7 +72,7 @@ class CTPRunManager : public ctpCCDBManager int mEOX = 0; // redundancy check int mNew = 1; // 1 - no CCDB: used for QC int mQCWritePeriod = 3; // Time in 10secs between two writes to QCCD - ClassDefNV(CTPRunManager, 7); + ClassDefNV(CTPRunManager, 8); }; } // namespace ctp } // namespace o2 diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h index c968a83183624..4237ad4501fcc 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h @@ -29,11 +29,13 @@ class ctpCCDBManager int saveRunConfigToCCDB(CTPConfiguration* cfg, long timeStart); int saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long timeStart); int saveOrbitReset(long timeStamp); + int saveCtpCfg(uint32_t runNumber, long timeStamp); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run, bool& ok); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run); CTPRunScalers getScalersFromCCDB(long timestamp, std::string, bool& ok); static void setCCDBHost(std::string host) { mCCDBHost = host; }; static void setQCDBHost(std::string host) { mQCDBHost = host; }; + void setCtpCfgDir(std::string& ctpcfgdir) { mCtpCfgDir = ctpcfgdir; }; protected: /// Database constants @@ -46,7 +48,10 @@ class ctpCCDBManager const std::string mQCDBPathCTPScalers = "qc/CTP/Scalers"; const std::string mCCDBPathSoxOrbit = "CTP/Calib/FirstRunOrbit"; const std::string mCCDBPathOrbitReset = "CTP/Calib/OrbitReset"; - ClassDefNV(ctpCCDBManager, 1); + const std::string mCCDBPathCtpCfg = "CTP/Config/CtpCfg"; + std::string mCtpCfgDir; + + ClassDefNV(ctpCCDBManager, 2); }; } // namespace ctp } // namespace o2 diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index 5d0b906e28088..054505aea7ba6 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -87,6 +87,7 @@ void CTPRunManager::init() LOG(info) << "QCDB writing every:" << mQCWritePeriod << " 10 secs"; LOG(info) << "CCDB host:" << mCCDBHost; LOG(info) << "CTP vNew cfg:" << mNew; + LOG(info) << "ctp.cfg dir:" << mCtpCfgDir; LOG(info) << "CTPRunManager initialised."; } int CTPRunManager::loadRun(const std::string& cfg) @@ -106,7 +107,7 @@ int CTPRunManager::loadRun(const std::string& cfg) timeStamp = (tt * 1000.); LOG(info) << "Timestamp file:" << timeStamp; cfgmod = cfg.substr(pos, cfg.size()); - LOG(info) << "ctpcfg: using ctp time"; + LOG(info) << "ctpconfig: using ctp time"; } } CTPActiveRun* activerun = new CTPActiveRun; @@ -122,7 +123,9 @@ int CTPRunManager::loadRun(const std::string& cfg) // mRunsLoaded[runnumber] = activerun; saveRunConfigToCCDB(&activerun->cfg, timeStamp); - + if (mCtpCfgDir != "none") { + saveCtpCfg(runnumber, timeStamp); + } return 0; } int CTPRunManager::setRunConfigBK(uint32_t runNumber, const std::string& cfg) @@ -272,6 +275,9 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message } return ret; } + if (topic.find("rocnts") != std::string::npos) { + return 0; + } static int nerror = 0; if (topic.find("sox") != std::string::npos) { // get config diff --git a/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx b/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx index b2896215d1c6a..f8f8ad3c95fbb 100644 --- a/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx +++ b/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx @@ -46,13 +46,14 @@ #include "BookkeepingApi/BkpClient.h" using namespace o2::framework; using DetID = o2::detectors::DetID; -InjectorFunction dcs2dpl(std::string& ccdbhost, std::string& bkhost, std::string& qchost, int qcwriteperiod) +InjectorFunction dcs2dpl(std::string& ccdbhost, std::string& bkhost, std::string& qchost, int qcwriteperiod, std::string& ctpcfgdir) { auto runMgr = std::make_shared(); runMgr->setCCDBHost(ccdbhost); runMgr->setBKHost(bkhost); runMgr->setQCDBHost(qchost); runMgr->setQCWritePeriod(qcwriteperiod); + runMgr->setCtpCfgDir(ctpcfgdir); runMgr->init(); // runMgr->setClient(client); return [runMgr](TimingInfo&, ServiceRegistryRef const& services, fair::mq::Parts& parts, ChannelRetriever channelRetriever, size_t newTimesliceId, bool& stop) -> bool { @@ -78,6 +79,7 @@ void customize(std::vector& workflowOptions) workflowOptions.push_back(ConfigParamSpec{"ccdb-host", VariantType::String, "http://o2-ccdb.internal:8080", {"ccdb host"}}); workflowOptions.push_back(ConfigParamSpec{"bk-host", VariantType::String, "none", {"bk host"}}); workflowOptions.push_back(ConfigParamSpec{"qc-host", VariantType::String, "none", {"qc host"}}); + workflowOptions.push_back(ConfigParamSpec{"ctpcfg-dir", VariantType::String, "none", {"ctp.cfg file directory"}}); workflowOptions.push_back(ConfigParamSpec{"qc-writeperiod", VariantType::Int, 30, {"Period of writing to QCDB in units of 10secs, default = 30 (5 mins)"}}); } @@ -104,6 +106,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) std::string bkhost = config.options().get("bk-host"); std::string qchost = config.options().get("qc-host"); int qcwriteperiod = config.options().get("qc-writeperiod"); + std::string ctpcfgdir = config.options().get("ctpcfg-dir"); if (chan.empty()) { throw std::runtime_error("input channel is not provided"); } @@ -118,7 +121,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) std::move(ctpCountersOutputs), // this is just default, can be overriden by --ctp-config-proxy '--channel-config..' chan.c_str(), - dcs2dpl(ccdbhost, bkhost, qchost, qcwriteperiod)); + dcs2dpl(ccdbhost, bkhost, qchost, qcwriteperiod, ctpcfgdir)); ctpProxy.labels.emplace_back(DataProcessorLabel{"input-proxy"}); LOG(info) << "===> Proxy done"; WorkflowSpec workflow; diff --git a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx index cbe8fe5dd675f..58850d88eb2c6 100644 --- a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx +++ b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx @@ -167,6 +167,36 @@ int ctpCCDBManager::saveOrbitReset(long timeStamp) } return 0; } +int ctpCCDBManager::saveCtpCfg(uint32_t runNumber, long timeStart) +{ + if (mCCDBHost == "none") { + LOG(info) << "CtpCfg not written to CCDB none"; + return 0; + } + CtpCfg ctpcfg; + int ret = ctpcfg.readAndSave(mCtpCfgDir); + if (ret == 0) { + using namespace std::chrono_literals; + std::chrono::seconds days3 = 259200s; + std::chrono::seconds min10 = 600s; + long time3days = std::chrono::duration_cast(days3).count(); + long time10min = std::chrono::duration_cast(min10).count(); + long tmin = timeStart - time10min; + long tmax = timeStart + time3days; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + metadata["runNumber"] = std::to_string(runNumber); + api.init(mCCDBHost.c_str()); // or http://localhost:8080 for a local installation + // store abitrary user object in strongly typed manner + ret = api.storeAsTFileAny(&ctpcfg, mCCDBPathCtpCfg, metadata, tmin, tmax); + if (ret == 0) { + LOG(info) << "CtpCfg saved in ccdb:" << mCCDBHost << " tmin:" << tmin << " tmax:" << tmax; + } else { + LOG(error) << "CtpCfg Problem writing to database ret:" << ret; + } + } + return ret; +} CTPConfiguration ctpCCDBManager::getConfigFromCCDB(long timestamp, std::string run, bool& ok) { auto& mgr = o2::ccdb::BasicCCDBManager::instance(); From edea1644f2e9190af8aa8c50f372dc23a5902155 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 21:16:48 +0200 Subject: [PATCH 0328/1764] GPU: Add debugSuffix option for debug files --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 6 +++--- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 638a3ed43d2aa..9e0aa32155f0d 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -296,6 +296,7 @@ AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GP AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index f8d4165477220..c1c3e368ce90c 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -378,7 +378,7 @@ int32_t GPUChainTracking::Init() } if (GetProcessingSettings().debugLevel >= 6) { - std::string filename = std::string(mRec->IsGPU() ? "GPU" : "CPU") + (mRec->slaveId() != -1 ? (std::string("_slave") + std::to_string(mRec->slaveId())) : std::string(mRec->slavesExist() ? "_master" : "")) + ".out"; + std::string filename = std::string(mRec->IsGPU() ? "GPU" : "CPU") + (mRec->slaveId() != -1 ? (std::string("_slave") + std::to_string(mRec->slaveId())) : std::string(mRec->slavesExist() ? "_master" : "")) + GetProcessingSettings().debugLogSuffix + ".out"; mDebugFile->open(filename.c_str()); } @@ -838,7 +838,7 @@ int32_t GPUChainTracking::RunChainFinalize() int32_t iKey; do { - Sleep(10); + usleep(10000); if (GetProcessingSettings().eventDisplay->EnableSendKey()) { iKey = kbhit() ? getch() : 0; if (iKey == 27) { @@ -847,7 +847,7 @@ int32_t GPUChainTracking::RunChainFinalize() break; } else if (iKey) { while (GetProcessingSettings().eventDisplay->getSendKey() != 0) { - Sleep(1); + usleep(1000); } GetProcessingSettings().eventDisplay->setSendKey(iKey); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 5e7672022b3ff..035e257ca7952 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -47,8 +47,6 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" -#include "utils/linux_helpers.h" - using namespace o2::gpu; #include "GPUO2DataTypes.h" From 408bae4d0040288a7740d6b0b9328ea41e9585ff Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 20:53:50 +0200 Subject: [PATCH 0329/1764] GPU: Add sorting of tracks of attached compressed clusters in deterministic mode --- .../DataCompression/GPUTPCCompression.cxx | 26 ++++++------ GPU/GPUTracking/Global/GPUChainTracking.h | 1 + .../Global/GPUChainTrackingCompression.cxx | 4 ++ .../GPUChainTrackingDebugAndProfiling.cxx | 42 +++++++++++++++++++ 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index a107f749ddd77..82834a694d0ba 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -140,7 +140,7 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) for (uint32_t i = 0; i < NSECTORS; i++) { out << "Sector " << i << ": "; for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", "; + out << (O.nSliceRowClusters ? O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] : 0) << ", "; } out << "\n"; } @@ -153,18 +153,20 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) } out << "\n\nUnattached Clusters\n"; uint32_t offset = 0; - for (uint32_t i = 0; i < NSECTORS; i++) { - for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - out << "Sector " << i << " Row " << j << ": "; - for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { - if (k && k % 10 == 0) { - out << "\n "; + if (O.nSliceRowClusters) { + for (uint32_t i = 0; i < NSECTORS; i++) { + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << "Sector " << i << " Row " << j << ": "; + for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l = k + offset; + out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; } - const uint32_t l = k + offset; - out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; + offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; + out << "\n"; } - offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; - out << "\n"; } } out << "\n\nAttached Clusters\n"; @@ -175,7 +177,7 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) if (k && k % 10 == 0) { out << "\n "; } - const uint32_t l1 = k + offset, l2 = k + offset - i; + const uint32_t l1 = offset + k, l2 = offset - i + k - 1; out << "["; if (k) { out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", "; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 13773a97d4e3d..2a2996895dbcf 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -235,6 +235,7 @@ class GPUChainTracking : public GPUChain void PrintDebugOutput(); void PrintOutputStat(); static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); + static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 534c02a4c0a84..3bcd2390eae52 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -203,6 +203,10 @@ int32_t GPUChainTracking::RunTPCCompression() ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec); } mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR")); + if (GetProcessingSettings().deterministicGPUReconstruction) { + SynchronizeGPU(); + DebugSortCompressedClusters(Compressor.mOutputFlat); + } DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile); return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 903505068ad2c..00cf127162b94 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE #include "bitmapfile.h" @@ -348,3 +349,44 @@ void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNat } } } + +void GPUChainTracking::DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls) +{ + o2::tpc::CompressedClusters c = *cls; + std::vector sorted(c.nTracks), offsets(c.nTracks); + std::iota(sorted.begin(), sorted.end(), 0); + auto sorter = [&c](const auto a, const auto b) { + return std::tie(c.sliceA[a], c.rowA[a], c.timeA[a], c.padA[a], c.qPtA[a]) < + std::tie(c.sliceA[b], c.rowA[b], c.timeA[b], c.padA[b], c.qPtA[b]); + }; + std::sort(sorted.begin(), sorted.end(), sorter); + uint32_t offset = 0; + for (uint32_t i = 0; i < c.nTracks; i++) { + offsets[i] = offset; + offset += c.nTrackClusters[i]; + } + + auto sortArray = [&c, &sorted, &offsets](auto* src, size_t totalSize, auto getOffset, auto getSize) { + auto buf = std::make_unique[]>(totalSize); + memcpy(buf.get(), src, totalSize * sizeof(*src)); + uint32_t targetOffset = 0; + for (uint32_t i = 0; i < c.nTracks; i++) { + const uint32_t j = sorted[i]; + memcpy(src + targetOffset, buf.get() + getOffset(offsets[j], j), getSize(j) * sizeof(*src)); + targetOffset += getSize(j); + } + }; + auto sortMultiple = [&sortArray](size_t totalSize, auto getOffset, auto getSize, auto&&... arrays) { + (..., sortArray(std::forward(arrays), totalSize, getOffset, getSize)); + }; + auto getFullOffset = [](uint32_t off, uint32_t ind) { return off; }; + auto getReducedOffset = [](uint32_t off, uint32_t ind) { return off - ind; }; + auto getIndex = [](uint32_t off, uint32_t ind) { return ind; }; + auto getN = [&c](uint32_t j) { return c.nTrackClusters[j]; }; + auto getN1 = [&c](uint32_t j) { return c.nTrackClusters[j] - 1; }; + auto get1 = [](uint32_t j) { return 1; }; + + sortMultiple(c.nAttachedClusters, getFullOffset, getN, c.qTotA, c.qMaxA, c.flagsA, c.sigmaPadA, c.sigmaTimeA); + sortMultiple(c.nAttachedClustersReduced, getReducedOffset, getN1, c.rowDiffA, c.sliceLegDiffA, c.padResA, c.timeResA); + sortMultiple(c.nTracks, getIndex, get1, c.qPtA, c.rowA, c.sliceA, c.timeA, c.padA, c.nTrackClusters); // NOTE: This must be last, since nTrackClusters is used for handling the arrays above! +} From 64dd944342374edfef4da03b1e5b64c2cfb5bc3f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 23:26:22 +0200 Subject: [PATCH 0330/1764] GPU: Make GPUCommonAlgorithm::sortInBlock deterministic with GPUCA_DETERMINISTIC_MODE --- GPU/Common/GPUCommonAlgorithm.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index 417c9e0d1f8c1..d0643391246a8 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -283,21 +283,29 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end, comp); #else - int32_t n = end - begin; - for (int32_t i = 0; i < n; i++) { - for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { - int32_t offset = i % 2; - int32_t curPos = 2 * tIdx + offset; - int32_t nextPos = curPos + 1; - - if (nextPos < n) { - if (!comp(begin[curPos], begin[nextPos])) { - IterSwap(&begin[curPos], &begin[nextPos]); + GPUCA_DETERMINISTIC_CODE( // clang-format off + GPUbarrier(); + if (get_local_id(0) == 0) { + GPUCommonAlgorithm::sort(begin, end, comp); + } + GPUbarrier(); + , // !GPUCA_DETERMINISTIC_CODE + int32_t n = end - begin; + for (int32_t i = 0; i < n; i++) { + for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { + int32_t offset = i % 2; + int32_t curPos = 2 * tIdx + offset; + int32_t nextPos = curPos + 1; + + if (nextPos < n) { + if (!comp(begin[curPos], begin[nextPos])) { + IterSwap(&begin[curPos], &begin[nextPos]); + } } } + GPUbarrier(); } - GPUbarrier(); - } + ) // clang-format on #endif } From fff75cfa60148feb0a51b524e956aeec7167a572 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 23:51:00 +0200 Subject: [PATCH 0331/1764] GPU: Use total sorting in deterministic mode for unattached clusters --- .../GPUTPCCompressionKernels.cxx | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 5dbbf63ca8264..3b88c8764d0fd 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -148,19 +148,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<0>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked(); } template <> -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<1>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { return mClsPtr[a].padPacked < mClsPtr[b].padPacked; } template <> -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<2>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { if (mClsPtr[a].getTimePacked() >> 3 == mClsPtr[b].getTimePacked() >> 3) { return mClsPtr[a].padPacked < mClsPtr[b].padPacked; @@ -169,7 +169,7 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<2>::opera } template <> -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { if (mClsPtr[a].padPacked >> 3 == mClsPtr[b].padPacked >> 3) { return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked(); @@ -177,6 +177,18 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::opera return mClsPtr[a].padPacked < mClsPtr[b].padPacked; } +template <> // Deterministic comparison +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<4>::operator()(uint32_t a, uint32_t b) const +{ + if (mClsPtr[a].getTimePacked() != mClsPtr[b].getTimePacked()) { + return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked(); + } + if (mClsPtr[a].padPacked != mClsPtr[b].padPacked) { + return mClsPtr[a].padPacked < mClsPtr[b].padPacked; + } + return mClsPtr[a].qTot < mClsPtr[b].qTot; +} + template <> GPUdii() void GPUTPCCompressionKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) { @@ -261,15 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } + , // !GPUCA_DETERMINISTIC_CODE + if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } + ) // clang-format on GPUbarrier(); } From b1c76a5365321adbe8f61bd8085ccb258a6304f1 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Fri, 9 May 2025 11:40:19 +0200 Subject: [PATCH 0332/1764] fix: remove debug --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index 797dd0e300519..74bd08ce943ee 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -302,8 +302,7 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector Date: Fri, 9 May 2025 12:18:39 +0200 Subject: [PATCH 0333/1764] Avoid including BasicCCDBManager in headers exposed to ROOT. (#14254) Root will happily embed references to curl.h and / or the kernel headers if found on the build machine and die if they are not there on the node. --- .../CTP/include/DataFormatsCTP/CTPRateFetcher.h | 12 +++++++----- DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h index 6b7802feb15ad..78c4245b16b20 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h @@ -14,14 +14,16 @@ #include -#include "CCDB/BasicCCDBManager.h" #include "DataFormatsParameters/GRPLHCIFData.h" #include "DataFormatsCTP/Configuration.h" #include "DataFormatsCTP/Scalers.h" -namespace o2 +namespace o2::ccdb { -namespace ctp +class BasicCCDBManager; +} + +namespace o2::ctp { class CTPRateFetcher @@ -54,7 +56,7 @@ class CTPRateFetcher o2::parameters::GRPLHCIFData mLHCIFdata{}; ClassDefNV(CTPRateFetcher, 1); }; -} // namespace ctp -} // namespace o2 +} // namespace o2::ctp + #endif // COMMON_CCDB_CTPRATEFETCHER_H_ diff --git a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx index 67e59aad3ea24..d899fcafec47d 100644 --- a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx +++ b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx @@ -10,6 +10,7 @@ // or submit itself to any jurisdiction. #include "DataFormatsCTP/CTPRateFetcher.h" +#include "CCDB/BasicCCDBManager.h" #include #include From fb08c353c9118f73c75fd4a41c1545ca541aef57 Mon Sep 17 00:00:00 2001 From: ariedel-cern <85537041+ariedel-cern@users.noreply.github.com> Date: Fri, 9 May 2025 19:23:07 +0200 Subject: [PATCH 0334/1764] TPC QC: Add getter for occupancy to Cluster class (#14251) Feat: add getOccupancy method to Cluster class --- Detectors/TPC/qc/include/TPCQC/Clusters.h | 6 ++++-- Detectors/TPC/qc/src/Clusters.cxx | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Detectors/TPC/qc/include/TPCQC/Clusters.h b/Detectors/TPC/qc/include/TPCQC/Clusters.h index ca9c53a43b3f6..b61c27e8bf0e6 100644 --- a/Detectors/TPC/qc/include/TPCQC/Clusters.h +++ b/Detectors/TPC/qc/include/TPCQC/Clusters.h @@ -17,10 +17,10 @@ #ifndef AliceO2_TPC_CLUSTERS_H #define AliceO2_TPC_CLUSTERS_H -//root includes +// root includes #include "TCanvas.h" -//o2 includes +// o2 includes #include "TPCBase/CalDet.h" #include "TPCBase/Sector.h" #include "DataFormatsTPC/Defs.h" @@ -75,6 +75,8 @@ class Clusters CalPad& getSigmaPad() { return mSigmaPad; } CalPad& getTimeBin() { return mTimeBin; } + CalPad getOccupancy(int nHBFPerTF = 32); + void endTF() { ++mProcessedTFs; } size_t getProcessedTFs() { return mProcessedTFs; } diff --git a/Detectors/TPC/qc/src/Clusters.cxx b/Detectors/TPC/qc/src/Clusters.cxx index 3a7b1d8481b16..4bf59ced195ed 100644 --- a/Detectors/TPC/qc/src/Clusters.cxx +++ b/Detectors/TPC/qc/src/Clusters.cxx @@ -22,8 +22,10 @@ #include "TPCBase/ROC.h" #include "TPCBase/CRU.h" #include "TPCBase/Mapper.h" +#include "TPCBase/ParameterElectronics.h" #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/KrCluster.h" +#include "CommonConstants/LHCConstants.h" ClassImp(o2::tpc::qc::Clusters); @@ -152,6 +154,13 @@ void Clusters::reset() mProcessedTFs = 0; } +//______________________________________________________________________________ +o2::tpc::CalPad Clusters::getOccupancy(int nHBFPerTF) +{ + o2::tpc::CalPad occupancy = mNClusters; + occupancy /= float(mProcessedTFs * (o2::constants::lhc::LHCMaxBunches * nHBFPerTF) / float(o2::tpc::ParameterElectronics::TIMEBININBC)); + return occupancy; +} //______________________________________________________________________________ void Clusters::merge(Clusters& clusters) { From a917b6b5c387e4a6ec2a232b317d5e7468b99091 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 9 May 2025 19:56:48 +0200 Subject: [PATCH 0335/1764] Add TPC occupancy in 10bins/drift in trackStudy --- .../study/src/TrackingStudy.cxx | 76 ++++++++++++------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index f206c43f7f57a..a74349bdeba15 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -95,7 +95,7 @@ class TrackingStudySpec : public Task std::unique_ptr mDBGOut; std::unique_ptr mDBGOutVtx; std::unique_ptr mTPCRefitter; ///< TPC refitter used for TPC tracks refit during the reconstruction - std::vector mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength + std::vector mMltHistTB, mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength std::unique_ptr mOccWghFun; float mITSROFrameLengthMUS = 0.f; float mTPCTBinMUS = 0.f; // TPC bin in microseconds @@ -107,6 +107,7 @@ class TrackingStudySpec : public Task float mMinX = 46.; float mMaxEta = 0.8; float mMinPt = 0.1; + int mNOccBinsDrift = 10; int mMinTPCClusters = 60; int mNTPCOccBinLength = 0; ///< TPC occ. histo bin length in TBs int mNHBPerTF = 0; @@ -142,6 +143,10 @@ void TrackingStudySpec::init(InitContext& ic) mDCAYFormula = ic.options().get("dcay-vs-pt"); mDCAZFormula = ic.options().get("dcaz-vs-pt"); mDoPairsCorr = ic.options().get("pair-correlations"); + mNOccBinsDrift = ic.options().get("noccbins"); + if (mNOccBinsDrift < 3) { + mNOccBinsDrift = 3; + } auto str = ic.options().get("occ-weight-fun"); if (!str.empty()) { mOccWghFun = std::make_unique("occFun", str.c_str(), -100., 100.); @@ -172,42 +177,23 @@ void TrackingStudySpec::run(ProcessingContext& pc) mTBinClOccAft.resize(nTPCOccBins); mTBinClOccBef.resize(nTPCOccBins); float sm = 0., tb = 0.5 * mNTPCOccBinLength; - /* // at the moment not used - if (mOccWghFun) { - mTBinClOccWgh.resize(nTPCBins); - float occBin2MUS = 8 * o2::constants::lhc::LHCBunchSpacingMUS; - int covWghTB = TMath::NInt(100./occBin2MUS); // coverage of weighted occ. in TBins - for (int i = 0; i < nTPCBins; i++) { - sm = 0.; - for (int j=-covWghTB;j=nTPCBins) { - continue; - } - sm += mOccWghFun->Eval(j*occBin2MUS)*mTPCRefitter->getParam()->GetUnscaledMult(j+i); - } - mTBinClOccWgh[i] = sm; - } - } else { - mTBinClOccWgh.resize(1); - } - */ - std::vector mltHistTB(nTPCOccBins); + mMltHistTB.resize(nTPCOccBins); for (int i = 0; i < nTPCOccBins; i++) { - mltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); + mMltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); tb += mNTPCOccBinLength; } for (int i = nTPCOccBins; i--;) { - sm += mltHistTB[i]; + sm += mMltHistTB[i]; if (i + sumBins < nTPCOccBins) { - sm -= mltHistTB[i + sumBins]; + sm -= mMltHistTB[i + sumBins]; } mTBinClOccAft[i] = sm; } sm = 0; for (int i = 0; i < nTPCOccBins; i++) { - sm += mltHistTB[i]; + sm += mMltHistTB[i]; if (i - sumBins > 0) { - sm -= mltHistTB[i - sumBins]; + sm -= mMltHistTB[i - sumBins]; } mTBinClOccBef[i] = sm; } @@ -271,13 +257,17 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) o2::dataformats::PrimaryVertexExt pveDummy; o2::dataformats::PrimaryVertexExt vtxDummy(mMeanVtx.getPos(), {}, {}, 0); std::vector pveVec(nv); + std::vector tpcOccAftV, tpcOccBefV; pveVec.back() = vtxDummy; const auto& alpParams = o2::itsmft::DPLAlpideParam::Instance(); float tBiasITS = alpParams.roFrameBiasInBC * o2::constants::lhc::LHCBunchSpacingMUS; const o2::ft0::InteractionTag& ft0Params = o2::ft0::InteractionTag::Instance(); std::vector trcExtVec; std::vector trcPairsVec; - auto vdrit = mTPCVDriftHelper.getVDriftObject().getVDrift(); + auto vdrift = mTPCVDriftHelper.getVDriftObject().getVDrift(); + float maxDriftTB = 250.f / vdrift / (o2::constants::lhc::LHCBunchSpacingMUS * 8); + int groupOcc = std::ceil(maxDriftTB / mNOccBinsDrift / mNTPCOccBinLength); + bool tpcTrackOK = recoData.isTrackSourceLoaded(GTrackID::TPC); auto fillTPCClInfo = [&recoData, this](const o2::tpc::TrackTPC& trc, o2::dataformats::TrackInfoExt& trExt, float timestampTB = -1e9) { @@ -391,6 +381,8 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } } }; + tpcOccAftV.resize(mNOccBinsDrift); + tpcOccBefV.resize(mNOccBinsDrift); for (int iv = 0; iv < nv; iv++) { LOGP(debug, "processing PV {} of {}", iv, nv); @@ -455,7 +447,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) continue; } if (iv < nv - 1 && is == GTrackID::TPC && tpcTr && !tpcTr->hasBothSidesClusters()) { // for unconstrained TPC tracks correct track Z - float corz = vdrit * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); + float corz = vdrift * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); if (tpcTr->hasASideClustersOnly()) { corz = -corz; // A-side } @@ -500,7 +492,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } else { o2::track::TrackParCov tmpTPC(*tpcTr); if (iv < nv - 1 && is == GTrackID::TPC && tpcTr && !tpcTr->hasBothSidesClusters()) { // for unconstrained TPC tracks correct track Z - float corz = vdrit * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); + float corz = vdrift * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); if (tpcTr->hasASideClustersOnly()) { corz = -corz; // A-side } @@ -554,10 +546,35 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) int tb = pveVec[iv].getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv; tpcOccBef = tb < 0 ? mTBinClOccBef[0] : (tb >= mTBinClOccBef.size() ? mTBinClOccBef.back() : mTBinClOccBef[tb]); tpcOccAft = tb < 0 ? mTBinClOccAft[0] : (tb >= mTBinClOccAft.size() ? mTBinClOccAft.back() : mTBinClOccAft[tb]); + int tbc = pveVec[iv].getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv - groupOcc / 2.; + for (int iob = 0; iob < mNOccBinsDrift; iob++) { + float sm = 0; + for (int ig = 0; ig < groupOcc; ig++) { + int ocb = tbc + ig + groupOcc * iob; + if (ocb < 0 || ocb >= (int)mMltHistTB.size()) { + sm = -1; + break; + } + sm += mMltHistTB[ocb]; + } + tpcOccAftV[iob] = sm; + // + sm = 0; + for (int ig = 0; ig < groupOcc; ig++) { + int ocb = tbc + ig - groupOcc * iob; + if (ocb < 0 || ocb >= (int)mMltHistTB.size()) { + sm = -1; + break; + } + sm += mMltHistTB[ocb]; + } + tpcOccBefV[iob] = sm; + } } (*mDBGOut) << "trpv" << "orbit=" << recoData.startIR.orbit << "tfID=" << TFCount << "tpcOccBef=" << tpcOccBef << "tpcOccAft=" << tpcOccAft + << "tpcOccBefV=" << tpcOccBefV << "tpcOccAftV=" << tpcOccAftV << "pve=" << pveVec[iv] << "trc=" << trcExtVec << "\n"; if (mDoPairsCorr) { @@ -752,6 +769,7 @@ DataProcessorSpec getTrackingStudySpec(GTrackID::mask_t srcTracks, GTrackID::mas {"with-its-only", VariantType::Bool, false, {"Store tracks with ITS only"}}, {"pair-correlations", VariantType::Bool, false, {"Do pairs correlation"}}, {"occ-weight-fun", VariantType::String, "(x>=-40&&x<-5) ? (1./1225*pow(x+40,2)) : ((x>-5&&x<15) ? 1. : ((x>=15&&x<40) ? (-0.4/25*x+1.24 ) : ( (x>40&&x<100) ? -0.4/60*x+0.6+0.8/3 : 0)))", {"Occupancy weighting f-n vs time in musec"}}, + {"noccbins", VariantType::Int, 10, {"Number of occupancy bins per full drift time"}}, {"min-x-prop", VariantType::Float, 100.f, {"track should be propagated to this X at least"}}, }; o2::tpc::VDriftHelper::requestCCDBInputs(dataRequest->inputs); From 1bcf367115a918253404a92a9537311646c895e1 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 11 May 2025 18:47:32 +0200 Subject: [PATCH 0336/1764] DPL: fix rate limiting handling (#14255) On success, FairMQ returns a positive number of bytes, not 0. --- Framework/Core/src/CommonDataProcessors.cxx | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonDataProcessors.cxx b/Framework/Core/src/CommonDataProcessors.cxx index 737e1b7e635c8..c2431b3ab068d 100644 --- a/Framework/Core/src/CommonDataProcessors.cxx +++ b/Framework/Core/src/CommonDataProcessors.cxx @@ -30,9 +30,11 @@ #include "Framework/RuntimeError.h" #include "Framework/RateLimiter.h" #include "Framework/PluginManager.h" +#include "Framework/Signpost.h" #include #include +#include #include #include #include @@ -40,6 +42,9 @@ using namespace o2::framework::data_matcher; +// Special log to track callbacks we know about +O2_DECLARE_DYNAMIC_LOG(callbacks); + namespace o2::framework { @@ -145,6 +150,10 @@ DataProcessorSpec CommonDataProcessors::getGlobalFairMQSink(std::vectordata; auto& timesliceIndex = services->get(); @@ -152,20 +161,35 @@ void retryMetricCallback(uv_async_t* async) auto channel = device->GetChannels().find("metric-feedback"); auto oldestPossingTimeslice = timesliceIndex.getOldestPossibleOutput().timeslice.value; if (channel == device->GetChannels().end()) { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", "Could not find metric-feedback channel."); return; } fair::mq::MessagePtr payload(device->NewMessage()); payload->Rebuild(&oldestPossingTimeslice, sizeof(int64_t), nullptr, nullptr); auto consumed = oldestPossingTimeslice; + size_t start = uv_hrtime(); int64_t result = channel->second[0].Send(payload, 100); + size_t stop = uv_hrtime(); // If the sending worked, we do not retry. - if (result != 0) { + if (result <= 0) { + // Forcefully slow down in case FairMQ returns earlier than expected... + int64_t ellapsed = (stop - start) / 1000000; + if (ellapsed < 100) { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", + "FairMQ returned %llu earlier than expected. Sleeping %llu ms more before, retrying.", + result, ellapsed); + uv_sleep(100 - ellapsed); + } else { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", + "FairMQ returned %llu, unable to send last consumed timeslice to source for %llu ms, retrying.", result, ellapsed); + } // If the sending did not work, we keep trying until it actually works. // This will schedule other tasks in the queue, so the processing of the // data will still happen. uv_async_send(async); } else { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", "Send %llu bytes, Last timeslice now set to %zu.", result, consumed); lastTimeslice = consumed; } } From f926be7e0b3e05ddce8e040f264b3eadf25a5a84 Mon Sep 17 00:00:00 2001 From: shahoian Date: Sun, 11 May 2025 15:37:02 +0200 Subject: [PATCH 0337/1764] Fix typo in the RecoContainer::getTrackTimeTPCTRD Thanks for Felix for spotting --- DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index 39cc05d8a69e7..d4b4e2b89cbb0 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1594,7 +1594,7 @@ void RecoContainer::getTrackTimeITSTPCTRD(GTrackID gid, float& t, float& tErr) c //________________________________________________________ void RecoContainer::getTrackTimeTPCTRD(GTrackID gid, float& t, float& tErr) const { - const auto trigTPCTRD = getITSTPCTRDTriggers(); + const auto trigTPCTRD = getTPCTRDTriggers(); // very slow: find the trigger this track belongs to for (const auto& trig : trigTPCTRD) { if (trig.getTrackRefs().getEntriesBound() > gid.getIndex()) { From 04ba3bd18306dd47a245d32f6556eec86d546a4b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 10:46:51 +0200 Subject: [PATCH 0338/1764] GPU: Fix performance regression: DETERMINISTIC CODE was used unintentionally in 2 places --- GPU/Common/GPUCommonAlgorithm.h | 37 +++++++++---------- .../Base/cuda/GPUReconstructionCUDArtc.cu | 5 ++- .../GPUTPCCompressionKernels.cxx | 24 ++++++------ 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index d0643391246a8..8cd53ec5e0609 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -282,30 +282,27 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) { #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end, comp); +#elif defined(GPUCA_DETERMINISTIC_MODE) // Not using GPUCA_DETERMINISTIC_CODE, which is enforced in TPC compression + if (get_local_id(0) == 0) { + GPUCommonAlgorithm::sort(begin, end, comp); + } + GPUbarrier(); #else - GPUCA_DETERMINISTIC_CODE( // clang-format off - GPUbarrier(); - if (get_local_id(0) == 0) { - GPUCommonAlgorithm::sort(begin, end, comp); - } - GPUbarrier(); - , // !GPUCA_DETERMINISTIC_CODE - int32_t n = end - begin; - for (int32_t i = 0; i < n; i++) { - for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { - int32_t offset = i % 2; - int32_t curPos = 2 * tIdx + offset; - int32_t nextPos = curPos + 1; - - if (nextPos < n) { - if (!comp(begin[curPos], begin[nextPos])) { - IterSwap(&begin[curPos], &begin[nextPos]); - } + int32_t n = end - begin; + for (int32_t i = 0; i < n; i++) { + for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { + int32_t offset = i % 2; + int32_t curPos = 2 * tIdx + offset; + int32_t nextPos = curPos + 1; + + if (nextPos < n) { + if (!comp(begin[curPos], begin[nextPos])) { + IterSwap(&begin[curPos], &begin[nextPos]); } } - GPUbarrier(); } - ) // clang-format on + GPUbarrier(); + } #endif } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 50a568ab345cf..805397c9b430e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -14,9 +14,12 @@ #define GPUCA_GPUCODE_GENRTC #define GPUCA_GPUCODE_COMPILEKERNELS + +// Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) -// GPUReconstructionCUDAIncludesSystem.h prependended without preprocessor running + +// GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 3b88c8764d0fd..bba97e9eace9b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -273,19 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); +#else // GPUCA_DETERMINISTIC_MODE + if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - , // !GPUCA_DETERMINISTIC_CODE - if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } - ) // clang-format on + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } +#endif // GPUCA_DETERMINISTIC_MODE GPUbarrier(); } From ea6e536b48052186c6bd263cd9df1e83e3f16ce7 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 12 May 2025 20:49:42 +0200 Subject: [PATCH 0339/1764] DPL: do not create unneeded statics (#14261) --- .../Core/include/Framework/TableBuilder.h | 28 ++++++++----------- Framework/Core/src/TableBuilder.cxx | 4 +-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 32fe78b852eff..936a8a04d5a5a 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -855,7 +855,7 @@ auto makeEmptyTable(const char* name, framework::pack p) } std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, std::vector> const& fields, const char* name, std::shared_ptr& projector); + expressions::Projector* projectors, const char* name, std::shared_ptr& projector); /// Expression-based column generator to materialize columns template @@ -867,10 +867,9 @@ auto spawner(std::vector>&& tables, const char* na if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(placeholders_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template @@ -881,10 +880,9 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, o if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(placeholders_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template @@ -896,15 +894,15 @@ auto spawner(std::vector>&& tables, const char* na if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(expression_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); + auto projectors = [](framework::pack) -> std::array { return {{std::move(C::Projector())...}}; } (expression_pack_t{}); - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); } template @@ -915,15 +913,14 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, s if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(expression_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); auto projectors = [](framework::pack) -> std::array { return {{std::move(C::Projector())...}}; } (expression_pack_t{}); - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); } template @@ -933,10 +930,9 @@ auto spawner(framework::pack columns, std::vectornum_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } - static auto fields = o2::soa::createFieldsFromColumns(columns); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(columns)); std::array projectors{{std::move(C::Projector())...}}; - return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), name, projector); } template diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index d9827559c2148..eb19f8d3fe642 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -85,11 +85,11 @@ void TableBuilder::setLabel(const char* label) } std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, std::vector> const& fields, const char* name, + expressions::Projector* projectors, const char* name, std::shared_ptr& projector) { if (projector == nullptr) { - projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), fields); + projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), newSchema->fields()); } arrow::TableBatchReader reader(*fullTable); From 240812f69d5d2cb39cfc5b30ce9ef63a3254f394 Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 12 May 2025 15:04:30 +0200 Subject: [PATCH 0340/1764] add TPC chi2 to trackstudy output --- .../study/include/GlobalTrackingStudy/TrackInfoExt.h | 3 ++- Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h index 935e57873bbd9..26eeea858d14b 100644 --- a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h @@ -35,6 +35,7 @@ struct TrackInfoExt { float ttime = 0; float ttimeE = 0; float xmin = 0; + float chi2TPC = 0.f; float chi2ITSTPC = 0.f; float q2ptITS = 0.f; float q2ptTPC = 0.f; @@ -56,7 +57,7 @@ struct TrackInfoExt { float getTPCInY0() const { return innerTPCPos0[1]; } float getTPCInZ0() const { return innerTPCPos0[2]; } - ClassDefNV(TrackInfoExt, 5); + ClassDefNV(TrackInfoExt, 6); }; } // namespace dataformats diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index a74349bdeba15..a2bf6abd35fef 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -503,6 +503,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } } fillTPCClInfo(*tpcTr, trcExt, tsuse); + trcExt.chi2TPC = tpcTr->getChi2(); } auto gidRefs = recoData.getSingleDetectorRefs(vid); if (gidRefs[GTrackID::ITS].isIndexSet()) { From 751bb12f5b93dd6612ad15f470eec418db2800bb Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 08:42:44 +0200 Subject: [PATCH 0341/1764] Move JSON parsing out of line (#14264) --- Framework/Core/CMakeLists.txt | 1 + .../include/Framework/VariantJSONHelpers.h | 430 +--------------- Framework/Core/src/VariantJSONHelpers.cxx | 464 ++++++++++++++++++ 3 files changed, 468 insertions(+), 427 deletions(-) create mode 100644 Framework/Core/src/VariantJSONHelpers.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 2691d9d33a0c6..17320348d9272 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -140,6 +140,7 @@ o2_add_library(Framework src/Task.cxx src/Array2D.cxx src/Variant.cxx + src/VariantJSONHelpers.cxx src/VariantPropertyTreeHelpers.cxx src/WorkflowCustomizationHelpers.cxx src/WorkflowHelpers.cxx diff --git a/Framework/Core/include/Framework/VariantJSONHelpers.h b/Framework/Core/include/Framework/VariantJSONHelpers.h index eab78d547ca13..811e6f13d4985 100644 --- a/Framework/Core/include/Framework/VariantJSONHelpers.h +++ b/Framework/Core/include/Framework/VariantJSONHelpers.h @@ -19,439 +19,15 @@ #include #include -#include -#include -#include +#include namespace o2::framework { -namespace -{ -template -struct VariantReader : public rapidjson::BaseReaderHandler, VariantReader> { - using Ch = rapidjson::UTF8<>::Ch; - using SizeType = rapidjson::SizeType; - - enum struct State { - IN_START, - IN_STOP, - IN_DATA, - IN_KEY, - IN_ARRAY, - IN_ROW, - IN_ERROR - }; - - VariantReader() - : states{}, - rows{0}, - cols{0} - { - debug << "Start" << std::endl; - states.push(State::IN_START); - } - - bool Null() - { - debug << "Null value encountered" << std::endl; - return true; - } - - bool Int(int i) - { - debug << "Int(" << i << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!std::is_same_v>) { - states.push(State::IN_ERROR); - return true; - } else { - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - debug << "added to array" << std::endl; - accumulatedData.push_back(i); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool Uint(unsigned i) - { - debug << "Uint -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Int64(int64_t i) - { - debug << "Int64 -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Uint64(uint64_t i) - { - debug << "Uint64 -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Double(double d) - { - debug << "Double(" << d << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!(std::is_same_v> || std::is_same_v>)) { - states.push(State::IN_ERROR); - return true; - } - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - if constexpr (std::is_same_v>) { - debug << "added to array as double" << std::endl; - accumulatedData.push_back(d); - return true; - } else if constexpr (std::is_same_v>) { - debug << "added to array as float" << std::endl; - accumulatedData.push_back(static_cast(d)); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool Bool(bool b) - { - debug << "Bool(" << b << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!std::is_same_v>) { - states.push(State::IN_ERROR); - return false; - } else { - if (states.top() == State::IN_ARRAY) { - debug << "added to array" << std::endl; - accumulatedData.push_back(b); - return true; - } - states.push(State::IN_ERROR); - return true; - } - } - - bool String(const Ch* str, SizeType, bool) - { - debug << "String(" << str << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!(V == VariantType::ArrayString || isLabeledArray())) { - states.push(State::IN_ERROR); - return true; - } else { - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - debug << "added to array" << std::endl; - if constexpr (isLabeledArray()) { - if (currentKey == labels_rows_str) { - labels_rows.push_back(str); - return true; - } - if (currentKey == labels_cols_str) { - labels_cols.push_back(str); - return true; - } - } - if (currentKey == "values") { - if constexpr (std::is_same_v>) { - accumulatedData.push_back(str); - } else { - states.push(State::IN_ERROR); - } - return true; - } - return true; - } - states.push(State::IN_ERROR); - return true; - } - } - - bool StartObject() - { - debug << "StartObject()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_START) { - states.push(State::IN_DATA); - return true; - } - states.push(State::IN_ERROR); - return true; - } - - bool Key(const Ch* str, SizeType, bool) - { - debug << "Key(" << str << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - currentKey = str; - return false; - } - if (states.top() == State::IN_DATA) { - // no previous keys - states.push(State::IN_KEY); - currentKey = str; - return true; - } - if (states.top() == State::IN_KEY) { - currentKey = str; - if constexpr (!isLabeledArray()) { - debug << "extra keys in a single-key variant" << std::endl; - states.push(State::IN_ERROR); - return true; - } - return true; - } - currentKey = str; - states.push(State::IN_ERROR); - return true; - } - - bool EndObject(SizeType) - { - debug << "EndObject()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_KEY) { - if constexpr (isArray()) { - debug << "creating 1d-array variant" << std::endl; - result = Variant(accumulatedData); - } else if constexpr (isArray2D()) { - debug << "creating 2d-array variant" << std::endl; - assert(accumulatedData.size() == rows * cols); - result = Variant(Array2D{accumulatedData, rows, cols}); - } else if constexpr (isLabeledArray()) { - debug << "creating labeled array variant" << std::endl; - assert(accumulatedData.size() == rows * cols); - if (labels_rows.empty() == false) { - assert(labels_rows.size() == rows); - } - if (labels_cols.empty() == false) { - assert(labels_cols.size() == cols); - } - result = Variant(LabeledArray{Array2D{accumulatedData, rows, cols}, labels_rows, labels_cols}); - } - states.push(State::IN_STOP); - return true; - } - states.push(State::IN_ERROR); - return true; - } - - bool StartArray() - { - debug << "StartArray()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_KEY) { - states.push(State::IN_ARRAY); - return true; - } else if (states.top() == State::IN_ARRAY) { - if constexpr (isArray2D() || isLabeledArray()) { - states.push(State::IN_ROW); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool EndArray(SizeType elementCount) - { - debug << "EndArray()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_ARRAY) { - // finish up array - states.pop(); - if constexpr (isArray2D() || isLabeledArray()) { - rows = elementCount; - } - return true; - } else if (states.top() == State::IN_ROW) { - // finish up row - states.pop(); - if constexpr (isArray2D() || isLabeledArray()) { - cols = elementCount; - } - return true; - } - states.push(State::IN_ERROR); - return true; - } - - std::stack states; - std::ostringstream debug; - - uint32_t rows; - uint32_t cols; - std::string currentKey; - std::vector> accumulatedData; - std::vector labels_rows; - std::vector labels_cols; - Variant result; -}; - -template -void writeVariant(std::ostream& o, Variant const& v) -{ - if constexpr (isArray() || isArray2D() || isLabeledArray()) { - using type = variant_array_element_type_t; - rapidjson::OStreamWrapper osw(o); - rapidjson::Writer w(osw); - - auto writeArray = [&](auto* values, size_t size) { - using T = std::remove_pointer_t; - w.StartArray(); - for (auto i = 0u; i < size; ++i) { - if constexpr (std::is_same_v) { - w.Int(values[i]); - } else if constexpr (std::is_same_v || std::is_same_v) { - w.Double(values[i]); - } else if constexpr (std::is_same_v) { - w.Bool(values[i]); - } else if constexpr (std::is_same_v) { - w.String(values[i].c_str()); - } - } - w.EndArray(); - }; - - auto writeVector = [&](auto&& vector) { - return writeArray(vector.data(), vector.size()); - }; - - auto writeArray2D = [&](auto&& array2d) { - using T = typename std::decay_t::element_t; - w.StartArray(); - for (auto i = 0u; i < array2d.rows; ++i) { - w.StartArray(); - for (auto j = 0u; j < array2d.cols; ++j) { - if constexpr (std::is_same_v) { - w.Int(array2d(i, j)); - } else if constexpr (std::is_same_v || std::is_same_v) { - w.Double(array2d(i, j)); - } else if constexpr (std::is_same_v) { - w.String(array2d(i, j).c_str()); - } - } - w.EndArray(); - } - w.EndArray(); - }; - - auto writeLabeledArray = [&](auto&& array) { - w.Key(labels_rows_str); - writeVector(array.getLabelsRows()); - w.Key(labels_cols_str); - writeVector(array.getLabelsCols()); - w.Key("values"); - writeArray2D(array.getData()); - }; - - w.StartObject(); - if constexpr (isArray()) { - w.Key("values"); - writeArray(v.get(), v.size()); - } else if constexpr (isArray2D()) { - w.Key("values"); - writeArray2D(v.get>()); - } else if constexpr (isLabeledArray()) { - writeLabeledArray(v.get>()); - } else if constexpr (V == VariantType::Dict) { - // nothing to do for dicts - } - w.EndObject(); - } -} -} // namespace - struct VariantJSONHelpers { template - static Variant read(std::istream& s) - { - rapidjson::Reader reader; - rapidjson::IStreamWrapper isw(s); - VariantReader vreader; - bool ok = reader.Parse(isw, vreader); - - if (ok == false) { - std::stringstream error; - error << "Cannot parse serialized Variant, error: " << rapidjson::GetParseError_En(reader.GetParseErrorCode()) << " at offset: " << reader.GetErrorOffset(); - throw std::runtime_error(error.str()); - } - return vreader.result; - } + static Variant read(std::istream& s); - static void write(std::ostream& o, Variant const& v) - { - switch (v.type()) { - case VariantType::ArrayInt: - writeVariant(o, v); - break; - case VariantType::ArrayFloat: - writeVariant(o, v); - break; - case VariantType::ArrayDouble: - writeVariant(o, v); - break; - case VariantType::ArrayBool: - throw std::runtime_error("Bool vectors not implemented yet"); - // writeVariant(o, v); - break; - case VariantType::ArrayString: - writeVariant(o, v); - break; - case VariantType::Array2DInt: - writeVariant(o, v); - break; - case VariantType::Array2DFloat: - writeVariant(o, v); - break; - case VariantType::Array2DDouble: - writeVariant(o, v); - break; - case VariantType::LabeledArrayInt: - writeVariant(o, v); - break; - case VariantType::LabeledArrayFloat: - writeVariant(o, v); - break; - case VariantType::LabeledArrayDouble: - writeVariant(o, v); - break; - case VariantType::LabeledArrayString: - writeVariant(o, v); - break; - case VariantType::Dict: - writeVariant(o, v); - default: - break; - } - } + static void write(std::ostream& o, Variant const& v); }; } // namespace o2::framework diff --git a/Framework/Core/src/VariantJSONHelpers.cxx b/Framework/Core/src/VariantJSONHelpers.cxx new file mode 100644 index 0000000000000..fbb5abb331867 --- /dev/null +++ b/Framework/Core/src/VariantJSONHelpers.cxx @@ -0,0 +1,464 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#include "Framework/VariantJSONHelpers.h" +#include "Framework/Variant.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace o2::framework +{ +namespace +{ +template +struct VariantReader : public rapidjson::BaseReaderHandler, VariantReader> { + using Ch = rapidjson::UTF8<>::Ch; + using SizeType = rapidjson::SizeType; + + enum struct State { + IN_START, + IN_STOP, + IN_DATA, + IN_KEY, + IN_ARRAY, + IN_ROW, + IN_ERROR + }; + + VariantReader() + : states{}, + rows{0}, + cols{0} + { + debug << "Start" << std::endl; + states.push(State::IN_START); + } + + bool Null() + { + debug << "Null value encountered" << std::endl; + return true; + } + + bool Int(int i) + { + debug << "Int(" << i << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!std::is_same_v>) { + states.push(State::IN_ERROR); + return true; + } else { + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + debug << "added to array" << std::endl; + accumulatedData.push_back(i); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool Uint(unsigned i) + { + debug << "Uint -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Int64(int64_t i) + { + debug << "Int64 -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Uint64(uint64_t i) + { + debug << "Uint64 -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Double(double d) + { + debug << "Double(" << d << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!(std::is_same_v> || std::is_same_v>)) { + states.push(State::IN_ERROR); + return true; + } + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + if constexpr (std::is_same_v>) { + debug << "added to array as double" << std::endl; + accumulatedData.push_back(d); + return true; + } else if constexpr (std::is_same_v>) { + debug << "added to array as float" << std::endl; + accumulatedData.push_back(static_cast(d)); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool Bool(bool b) + { + debug << "Bool(" << b << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!std::is_same_v>) { + states.push(State::IN_ERROR); + return false; + } else { + if (states.top() == State::IN_ARRAY) { + debug << "added to array" << std::endl; + accumulatedData.push_back(b); + return true; + } + states.push(State::IN_ERROR); + return true; + } + } + + bool String(const Ch* str, SizeType, bool) + { + debug << "String(" << str << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!(V == VariantType::ArrayString || isLabeledArray())) { + states.push(State::IN_ERROR); + return true; + } else { + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + debug << "added to array" << std::endl; + if constexpr (isLabeledArray()) { + if (currentKey == labels_rows_str) { + labels_rows.push_back(str); + return true; + } + if (currentKey == labels_cols_str) { + labels_cols.push_back(str); + return true; + } + } + if (currentKey == "values") { + if constexpr (std::is_same_v>) { + accumulatedData.push_back(str); + } else { + states.push(State::IN_ERROR); + } + return true; + } + return true; + } + states.push(State::IN_ERROR); + return true; + } + } + + bool StartObject() + { + debug << "StartObject()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_START) { + states.push(State::IN_DATA); + return true; + } + states.push(State::IN_ERROR); + return true; + } + + bool Key(const Ch* str, SizeType, bool) + { + debug << "Key(" << str << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + currentKey = str; + return false; + } + if (states.top() == State::IN_DATA) { + // no previous keys + states.push(State::IN_KEY); + currentKey = str; + return true; + } + if (states.top() == State::IN_KEY) { + currentKey = str; + if constexpr (!isLabeledArray()) { + debug << "extra keys in a single-key variant" << std::endl; + states.push(State::IN_ERROR); + return true; + } + return true; + } + currentKey = str; + states.push(State::IN_ERROR); + return true; + } + + bool EndObject(SizeType) + { + debug << "EndObject()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_KEY) { + if constexpr (isArray()) { + debug << "creating 1d-array variant" << std::endl; + result = Variant(accumulatedData); + } else if constexpr (isArray2D()) { + debug << "creating 2d-array variant" << std::endl; + assert(accumulatedData.size() == rows * cols); + result = Variant(Array2D{accumulatedData, rows, cols}); + } else if constexpr (isLabeledArray()) { + debug << "creating labeled array variant" << std::endl; + assert(accumulatedData.size() == rows * cols); + if (labels_rows.empty() == false) { + assert(labels_rows.size() == rows); + } + if (labels_cols.empty() == false) { + assert(labels_cols.size() == cols); + } + result = Variant(LabeledArray{Array2D{accumulatedData, rows, cols}, labels_rows, labels_cols}); + } + states.push(State::IN_STOP); + return true; + } + states.push(State::IN_ERROR); + return true; + } + + bool StartArray() + { + debug << "StartArray()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_KEY) { + states.push(State::IN_ARRAY); + return true; + } else if (states.top() == State::IN_ARRAY) { + if constexpr (isArray2D() || isLabeledArray()) { + states.push(State::IN_ROW); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool EndArray(SizeType elementCount) + { + debug << "EndArray()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_ARRAY) { + // finish up array + states.pop(); + if constexpr (isArray2D() || isLabeledArray()) { + rows = elementCount; + } + return true; + } else if (states.top() == State::IN_ROW) { + // finish up row + states.pop(); + if constexpr (isArray2D() || isLabeledArray()) { + cols = elementCount; + } + return true; + } + states.push(State::IN_ERROR); + return true; + } + + std::stack states; + std::ostringstream debug; + + uint32_t rows; + uint32_t cols; + std::string currentKey; + std::vector> accumulatedData; + std::vector labels_rows; + std::vector labels_cols; + Variant result; +}; +} // namespace + +template +Variant VariantJSONHelpers::read(std::istream& s) +{ + rapidjson::Reader reader; + rapidjson::IStreamWrapper isw(s); + VariantReader vreader; + bool ok = reader.Parse(isw, vreader); + + if (ok == false) { + std::stringstream error; + error << "Cannot parse serialized Variant, error: " << rapidjson::GetParseError_En(reader.GetParseErrorCode()) << " at offset: " << reader.GetErrorOffset(); + throw std::runtime_error(error.str()); + } + return vreader.result; +} + +template +void writeVariant(std::ostream& o, Variant const& v) +{ + if constexpr (isArray() || isArray2D() || isLabeledArray()) { + using type = variant_array_element_type_t; + rapidjson::OStreamWrapper osw(o); + rapidjson::Writer w(osw); + + auto writeArray = [&](auto* values, size_t size) { + using T = std::remove_pointer_t; + w.StartArray(); + for (auto i = 0u; i < size; ++i) { + if constexpr (std::is_same_v) { + w.Int(values[i]); + } else if constexpr (std::is_same_v || std::is_same_v) { + w.Double(values[i]); + } else if constexpr (std::is_same_v) { + w.Bool(values[i]); + } else if constexpr (std::is_same_v) { + w.String(values[i].c_str()); + } + } + w.EndArray(); + }; + + auto writeVector = [&](auto&& vector) { + return writeArray(vector.data(), vector.size()); + }; + + auto writeArray2D = [&](auto&& array2d) { + using T = typename std::decay_t::element_t; + w.StartArray(); + for (auto i = 0u; i < array2d.rows; ++i) { + w.StartArray(); + for (auto j = 0u; j < array2d.cols; ++j) { + if constexpr (std::is_same_v) { + w.Int(array2d(i, j)); + } else if constexpr (std::is_same_v || std::is_same_v) { + w.Double(array2d(i, j)); + } else if constexpr (std::is_same_v) { + w.String(array2d(i, j).c_str()); + } + } + w.EndArray(); + } + w.EndArray(); + }; + + auto writeLabeledArray = [&](auto&& array) { + w.Key(labels_rows_str); + writeVector(array.getLabelsRows()); + w.Key(labels_cols_str); + writeVector(array.getLabelsCols()); + w.Key("values"); + writeArray2D(array.getData()); + }; + + w.StartObject(); + if constexpr (isArray()) { + w.Key("values"); + writeArray(v.get(), v.size()); + } else if constexpr (isArray2D()) { + w.Key("values"); + writeArray2D(v.get>()); + } else if constexpr (isLabeledArray()) { + writeLabeledArray(v.get>()); + } else if constexpr (V == VariantType::Dict) { + // nothing to do for dicts + } + w.EndObject(); + } +} + +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); + +void VariantJSONHelpers::write(std::ostream& o, Variant const& v) +{ + switch (v.type()) { + case VariantType::ArrayInt: + writeVariant(o, v); + break; + case VariantType::ArrayFloat: + writeVariant(o, v); + break; + case VariantType::ArrayDouble: + writeVariant(o, v); + break; + case VariantType::ArrayBool: + throw std::runtime_error("Bool vectors not implemented yet"); + // writeVariant(o, v); + break; + case VariantType::ArrayString: + writeVariant(o, v); + break; + case VariantType::Array2DInt: + writeVariant(o, v); + break; + case VariantType::Array2DFloat: + writeVariant(o, v); + break; + case VariantType::Array2DDouble: + writeVariant(o, v); + break; + case VariantType::LabeledArrayInt: + writeVariant(o, v); + break; + case VariantType::LabeledArrayFloat: + writeVariant(o, v); + break; + case VariantType::LabeledArrayDouble: + writeVariant(o, v); + break; + case VariantType::LabeledArrayString: + writeVariant(o, v); + break; + case VariantType::Dict: + writeVariant(o, v); + default: + break; + } +} +} // namespace o2::framework From 8de719349f3dc57e670b7ebf6b8206dee1e426b7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 11:43:12 +0200 Subject: [PATCH 0342/1764] GPU: Add GPUCA_RTC_CONSTEXPR macro for constexpr only in RTC --- GPU/Common/GPUCommonDef.h | 4 ++++ GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 1 + GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu | 1 + 3 files changed, 6 insertions(+) diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index d7e99f53d4ce8..d9a5bdf92b6ac 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -72,6 +72,10 @@ #define GPUCA_RTC_SPECIAL_CODE(...) #endif +#ifndef GPUCA_RTC_CONSTEXPR + #define GPUCA_RTC_CONSTEXPR +#endif + #ifndef GPUCA_DETERMINISTIC_CODE #ifdef GPUCA_DETERMINISTIC_MODE #define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 5706f32e73e96..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,6 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 805397c9b430e..66c02d6ed251c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -18,6 +18,7 @@ // Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) +#define GPUCA_RTC_CONSTEXPR GPUCA_RTC_CONSTEXPR // GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" From 1dc506884cf82e11378a33ad1621319e11a17402 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 11:43:40 +0200 Subject: [PATCH 0343/1764] GPU TPC: Compute alternative dEdx only if it has different settings than normal dEdx --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 17 +++-- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 70 +++++++++++-------- 3 files changed, 55 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6e7de7ee48ca6..d2aba503be6a6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -301,7 +301,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9dc6ddc59c2b4..9ead17ea5c7c0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -106,6 +106,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); TrackTPC* outputTracks = merger.OutputTracksTPCO2(); uint32_t* clusRefs = merger.OutputClusRefsTPCO2(); + const auto& param = merger.Param(); GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); uint2* GPUrestrict() tmpData = merger.ClusRefTmp(); @@ -130,9 +131,15 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) { - oTrack.setdEdx(tracksdEdx[i]); - oTrack.setdEdxAlt(tracksdEdxAlt[i]); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled) { + oTrack.setdEdx(tracksdEdx[i]); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + oTrack.setdEdxAlt(tracksdEdxAlt[i]); + } else { + oTrack.setdEdxAlt(tracksdEdx[i]); + } + } } auto snpOut = outerPar.P[2]; @@ -148,9 +155,9 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11], outerPar.C[12], outerPar.C[13], outerPar.C[14]})); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) { + if (param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.enablePID) { PIDResponse pidResponse{}; - auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); + auto pid = pidResponse.getMostProbablePID(oTrack, param.rec.tpc.PID_EKrangeMin, param.rec.tpc.PID_EKrangeMax, param.rec.tpc.PID_EPrangeMin, param.rec.tpc.PID_EPrangeMax, param.rec.tpc.PID_EDrangeMin, param.rec.tpc.PID_EDrangeMax, param.rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); auto pidRemap = merger.Param().rec.tpc.PID_remap[pid]; if (pidRemap >= 0) { pid = pidRemap; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index f5bfbe985fb8c..0d8547263207b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -216,11 +216,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); - if (dodEdx) { - dEdx.fillSubThreshold(lastRow - wayDirection); - dEdxAlt.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + if (dodEdx) { + dEdx.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.fillSubThreshold(lastRow - wayDirection); + } + } } } @@ -367,31 +371,35 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters - bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; - if (acc || accAlt) { - float qtot = 0, qmax = 0, pad = 0, relTime = 0; - const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; - for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { - if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { - qtot += clustersXYZ[ihit].amp; - } else { - const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; - qtot += cl.qTot; - qmax = CAMath::Max(qmax, cl.qMax); - pad += cl.getPad(); - relTime += cl.getTime(); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; + if (acc || accAlt) { + float qtot = 0, qmax = 0, pad = 0, relTime = 0; + const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; + for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { + if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { + qtot += clustersXYZ[ihit].amp; + } else { + const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; + qtot += cl.qTot; + qmax = CAMath::Max(qmax, cl.qMax); + pad += cl.getPad(); + relTime += cl.getTime(); + } + } + qtot /= clusterCount; // TODO: Weighted Average + pad /= clusterCount; + relTime /= clusterCount; + relTime = relTime - CAMath::Round(relTime); + if (acc) { + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if (accAlt) { + dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } } - } - qtot /= clusterCount; // TODO: Weighted Average - pad /= clusterCount; - relTime /= clusterCount; - relTime = relTime - CAMath::Round(relTime); - if (acc) { - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); - } - if (accAlt) { - dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } } @@ -428,7 +436,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + } } Alpha = prop.GetAlpha(); MoveToReference(prop, param, Alpha); From 760f73e62a3a7898c81eee53e1d67012c58c39be Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 21:19:41 +0200 Subject: [PATCH 0344/1764] GPU CMake: If deterministic mode is set to MaxOptO2, do not impose -O2 when BUILD_TYPE is DEBUG --- GPU/GPUTracking/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 2e26622d05291..52848692e7516 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -16,11 +16,12 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") +elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + else() + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() -elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}") From 89b35ba2d75113e60b2045ed01e169b28d860a07 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 21:22:52 +0200 Subject: [PATCH 0345/1764] GPU: Workaround for Clang Frontend issue This is fixed with Clang >= 20 and C++23 (P2280R4) --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index acc77648d954b..67ad608c13417 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,7 +38,11 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + +#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + +#else + std::string("#define GPUCA_RTC_CONSTEXPR\n") + +#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..c89ef1769ad81 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,3 +270,8 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() + +set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" +TARGET_DIRECTORY O2::GPUTrackingHIP +PROPERTIES +COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 4b72f186ec6e2981cae50338b97855efde70dc8a Mon Sep 17 00:00:00 2001 From: tubagundem Date: Tue, 13 May 2025 11:20:40 +0200 Subject: [PATCH 0346/1764] TPC: Fix digitizer workflow to load GEM params from CCDB before creating Polya file --- .../DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index 75141425f7c49..a04f73a62fbf8 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -286,7 +286,7 @@ int getNumTPCLanes(std::vector const& sectors, ConfigContext const& configc // ------------------------------------------------------------------ -void initTPC() +void initTPC(long timestamp) { // We only want to do this for the DPL master // I am not aware of an easy way to query if "I am DPL master" so @@ -308,6 +308,12 @@ void initTPC() auto& cdb = o2::tpc::CDBInterface::instance(); cdb.setUseDefaults(); + + // IMPORTANT: load ParameterGEM from CCDB + auto& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGEM), timestamp); + LOGP(info, "initTPC: TPC GEM param updated for time {}", timestamp); + o2::tpc::ParameterGEM::Instance().printKeyValues(true, true); // by invoking this constructor we make sure that a common file will be created // in future we should take this from OCDB and just forward per message const static auto& ampl = o2::tpc::GEMAmplification::instance(); @@ -592,7 +598,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) if (isEnabled(o2::detectors::DetID::TPC)) { if (!helpasked && ismaster) { - initTPC(); + initTPC(hbfu.startTime); } tpcsectors = o2::RangeTokenizer::tokenize(configcontext.options().get("tpc-sectors")); From 947a1a8cc06cd2e694cb8e5beb561e9392629af5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:12:57 +0200 Subject: [PATCH 0347/1764] GPU TPC: Remove some obsolete code and track members (leftover from Run 2 by Sergey and totally forgotten), which were wasting performance --- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 9 -------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 21 +------------------ 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 578fe1eeb4ca7..6ef2ed2ede668 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -41,9 +41,6 @@ class GPUTPCGMMergedTrack { return mAlpha; } - GPUd() float LastX() const { return mLastX; } - GPUd() float LastY() const { return mLastY; } - GPUd() float LastZ() const { return mLastZ; } GPUd() bool OK() const { return mFlags & 0x01; } GPUd() bool Looper() const { return mFlags & 0x02; } GPUd() bool CSide() const { return mFlags & 0x04; } @@ -55,9 +52,6 @@ class GPUTPCGMMergedTrack GPUd() void SetFirstClusterRef(int32_t v) { mFirstClusterRef = v; } GPUd() void SetParam(const GPUTPCGMTrackParam& v) { mParam = v; } GPUd() void SetAlpha(float v) { mAlpha = v; } - GPUd() void SetLastX(float v) { mLastX = v; } - GPUd() void SetLastY(float v) { mLastY = v; } - GPUd() void SetLastZ(float v) { mLastZ = v; } GPUd() void SetOK(bool v) { if (v) { @@ -110,9 +104,6 @@ class GPUTPCGMMergedTrack gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 0d8547263207b..366f75cb05e56 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1143,26 +1143,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr track.Param() = t; track.Alpha() = Alpha; - if (track.OK()) { - int32_t ind = track.FirstClusterRef(); - const GPUParam& GPUrestrict() param = merger->Param(); - float alphaa = param.Alpha(merger->Clusters()[ind].sector); - float xx, yy, zz; - if (merger->Param().par.earlyTpcTransform) { - xx = merger->ClustersXYZ()[ind].x; - yy = merger->ClustersXYZ()[ind].y; - zz = merger->ClustersXYZ()[ind].z - track.Param().GetTZOffset(); - } else { - const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[merger->Clusters()[ind].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].sector, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); - } - float sinA, cosA; - CAMath::SinCos(alphaa - track.Alpha(), sinA, cosA); - track.SetLastX(xx * cosA - yy * sinA); - track.SetLastY(xx * sinA + yy * cosA); - track.SetLastZ(zz); - // merger->DebugRefitMergedTrack(track); - } + // if (track.OK()) merger->DebugRefitMergedTrack(track); } GPUd() void GPUTPCGMTrackParam::Rotate(float alpha) From f3f10a25f8abaf9dcf62e764a963f68dfefdc6b4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:17:56 +0200 Subject: [PATCH 0348/1764] GPU TPC: Rename some variables with misleading name --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 4 +-- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUTPCCompressionKernels.cxx | 10 +++--- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +-- .../Global/GPUChainTrackingMerger.cxx | 12 +++---- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 32 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 4 +-- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 14 ++++---- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 6 ++-- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 4 +-- 12 files changed, 48 insertions(+), 48 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 8796f063abdc5..d3dd561dcea2f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -109,13 +109,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 82834a694d0ba..ec1636dfe7f59 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -125,7 +125,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include - mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks(); + mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index bba97e9eace9b..73b195e8f4fe4 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -201,7 +201,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadclusterOffset[iSector][iRow]; - const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; + const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; // 32 bit enough for number of clusters per row * 1024 const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal if (iThread == nThreads - 1) { smem.nCount = 0; @@ -214,7 +214,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; - int32_t cidx = 0; + int32_t storeCluster = 0; do { if (i >= clusters->nClusters[iSector][iRow]) { break; @@ -239,13 +239,13 @@ GPUdii() void GPUTPCCompressionKernels::ThreadtpcTrackers[i].NTrackHits(), processors()->tpcTrackers[i].NMaxTrackHits()); } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); - addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NOutputTracks(), processors()->tpcMerger.NMaxTracks()); + addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { @@ -181,7 +181,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTracks NCl %d NTrk %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracks()); GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } - GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTracks()); + GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 6e86be03e7950..bd1fa7796dadf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -220,7 +220,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } @@ -233,7 +233,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); if (param().rec.tpc.mergeLoopersAfterburner) { - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); if (doGPU) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0); SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel @@ -255,10 +255,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -326,7 +326,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } mIOPtrs.mergedTracks = Merger.OutputTracks(); - mIOPtrs.nMergedTracks = Merger.NOutputTracks(); + mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); @@ -340,7 +340,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (doGPU) { processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); - processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks(); + processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index d2aba503be6a6..e96bbeee774bf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -415,7 +415,7 @@ int32_t GPUTPCGMMerger::CheckSectors() GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output) { - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) { mTrackLinks[i] = -1; } @@ -1271,7 +1271,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; @@ -1392,7 +1392,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1533,7 +1533,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { const GPUTPCGMSectorTrack* t = trackParts[ipart]; - CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs())); + CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs())); int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { @@ -1678,10 +1678,10 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].leg = trackClusters[i].leg; } - uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u); + uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); if (iOutputTrack >= mNMaxTracks) { raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks); - CAMath::AtomicExch(&mMemory->nOutputTracks, mNMaxTracks); + CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks); continue; } @@ -1718,9 +1718,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } - // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nOutputTracks, p1.QPt(), nParts, nHits); + // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits); - /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nOutputTracks)) + /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks)) { mergedTrack.SetOK(0); mergedTrack.SetNClusters(0); @@ -1742,14 +1742,14 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackOrderProcess[i] = i; } } GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackSort[i] = i; } } @@ -1784,7 +1784,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ ) // clang-format on }; - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, comp); #endif } @@ -1810,13 +1810,13 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int ) // clang-format on }; - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, comp); #endif } GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; if (trk.OK()) { @@ -1848,7 +1848,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { @@ -1858,7 +1858,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; @@ -1893,7 +1893,7 @@ GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold - for (uint32_t i = get_global_id(0); i < mMemory->nOutputTracks; i += get_global_size(0)) { + for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { const auto& trk = mOutputTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c6e0e02a2dc2..6c9c14b557798 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -69,7 +69,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nRetryRefit; GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; - GPUAtomic(uint32_t) nOutputTracks; + GPUAtomic(uint32_t) nMergedTracks; GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; @@ -103,7 +103,7 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersOutputState(void* mem); void* SetPointersMemory(void* mem); - GPUhdi() int32_t NOutputTracks() const { return mMemory->nOutputTracks; } + GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index ac55f423b1c42..02d0ac98b05b0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -94,7 +94,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSector, in void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* type) const { out << "\nTPC Merger Links " << type << "\n"; - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = 0; i < n; i++) { if (mTrackLinks[i] != -1) { out << " " << i << ": " << mTrackLinks[i] << "\n"; @@ -138,7 +138,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; @@ -150,7 +150,7 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const { DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; @@ -162,11 +162,11 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const { out << "\nTPC Merger Refit Prepare\n"; out << " Sort\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { out << " " << i << ": " << mTrackOrderAttach[i] << "\n"; } out << " Clusters\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mOutputTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { @@ -195,7 +195,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Refit\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; if (trk.NClusters() == 0) { continue; @@ -212,7 +212,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mOutputTracks[j]; if (trk.NClusters() == 0) { continue; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index d72d59a6250e7..68763b3549547 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode) { - const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NOutputTracks(); + const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9ead17ea5c7c0..72e9f63e5da83 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -38,7 +38,7 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - const uint32_t nTracks = merger.NOutputTracks(); + const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx index 78eea63edecdd..ebc9d22560524 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx @@ -34,7 +34,7 @@ void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPC } seeds->Clear(); int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; @@ -112,7 +112,7 @@ void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) return; } int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; @@ -134,7 +134,7 @@ void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) return; } int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index a21593b7ba9e9..e63bb82a9b09e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -100,7 +100,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread Date: Tue, 13 May 2025 10:25:11 +0200 Subject: [PATCH 0349/1764] GPU: Add additional optional debbug dumps for validation --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../Global/GPUChainTrackingDebug.h | 45 ++++++++++--------- .../Global/GPUChainTrackingMerger.cxx | 7 +-- .../Global/GPUChainTrackingSectorTracker.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 15 ++++++- 7 files changed, 47 insertions(+), 28 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9e0aa32155f0d..9400a429fca81 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking") AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") -AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file") AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h index 810f40a1d8654..6c995f65f3dd3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -23,28 +23,29 @@ namespace o2::gpu { // NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask enum GPUChainTrackingDebugFlags : uint32_t { - TPCSectorTrackingData = 1, - TPCPreLinks = 2, - TPCLinks = 4, - TPCStartHits = 8, - TPCTracklets = 16, - TPCSectorTracks = 32, - TPCHitWeights = 256, - TPCCompressedClusters = 512, - TPCDecompressedClusters = 1024, - TPCMergingRanges = 2048, - TPCMergingSectorTracks = 4096, - TPCMergingMergedTracks = 8192, - TPCMergingCollectedTracks = 16384, - TPCMergingCE = 32768, - TPCMergingRefit = 65536, - TPCClustererClusters = 131072, - TPCClusterer = 262144, - TPCClustererDigits = 262144 << 1, - TPCClustererPeaks = 262144 << 2, - TPCClustererSuppressedPeaks = 262144 << 3, - TPCClustererChargeMap = 262144 << 4, - TPCClustererZeroedCharges = 262144 << 5 + TPCSectorTrackingData = 1 << 0, + TPCPreLinks = 1 << 1, + TPCLinks = 1 << 2, + TPCStartHits = 1 << 3, + TPCTracklets = 1 << 4, + TPCSectorTracks = 1 << 5, + TPCHitWeights = 1 << 6, + TPCMergingRanges = 1 << 7, + TPCMergingSectorTracks = 1 << 8, + TPCMergingMatching = 1 << 9, + TPCMergingCollectedTracks = 1 << 10, + TPCMergingCE = 1 << 11, + TPCMergingPrepareFit = 1 << 12, + TPCMergingRefit = 1 << 13, + TPCMergingLoopers = 1 << 14, + TPCCompressedClusters = 1 << 15, + TPCDecompressedClusters = 1 << 16, + TPCClustererClusters = 1 << 17, + TPCClustererDigits = 1 << 18, + TPCClustererPeaks = 1 << 19, + TPCClustererSuppressedPeaks = 1 << 20, + TPCClustererChargeMap = 1 << 21, + TPCClustererZeroedCharges = 1 << 22 }; template diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index bd1fa7796dadf..df80eabfb8761 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -143,7 +143,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -158,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -202,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingPrepareFit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -240,6 +240,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingLoopers, Merger, &GPUTPCGMMerger::DumpLoopers, *mDebugFile); } DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index ef38d53173c2b..67ef402961a20 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -176,7 +176,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + if ((GetProcessingSettings().debugMask & 63)) { + *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + } if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 6ef2ed2ede668..73b14ba1b2fdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -95,6 +95,7 @@ class GPUTPCGMMergedTrack GPUd() void SetFlags(uint8_t v) { mFlags = v; } GPUd() void SetLegs(uint8_t v) { mLegs = v; } GPUd() uint8_t Legs() const { return mLegs; } + GPUd() uint8_t Flags() const { return mFlags; } GPUd() const gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() const { return mOuterParam; } GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c9c14b557798..ae85f20b17b48 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -201,6 +201,7 @@ class GPUTPCGMMerger : public GPUProcessor void DumpFitPrepare(std::ostream& out) const; void DumpRefit(std::ostream& out) const; void DumpFinal(std::ostream& out) const; + void DumpLoopers(std::ostream& out) const; template void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 02d0ac98b05b0..3be32a2d87610 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -204,11 +204,24 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) - << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; + << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] + << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } out << std::setprecision(ss); } +void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const +{ + out << "\n TPC Merger Looper Afterburner\n"; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << (int)mOutputTracks[i].MergedLooper() << " "; + } + out << "\n"; +} + void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; From d20b540fb0b5693ad6fa39b128d589160c91163f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:25:47 +0200 Subject: [PATCH 0350/1764] GPU TPC: Fix deterministic mode for TPC cluster compression / decompression / looper merging afterburner --- .../Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc | 4 ++-- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index d3dd561dcea2f..1d633eb5e748f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -82,14 +82,14 @@ struct GPUTPCGMMergerSortTracksQPt_comp { struct GPUTPCGMMergerMergeLoopers_comp { GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) { - return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); } }; struct GPUTPCGMO2OutputSort_comp { GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) { - return (a.y > b.y); + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); } }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index e96bbeee774bf..99ef548b2d78e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1948,7 +1948,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, if (iThread || iBlock) { return; } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); }; + auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); }; GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 72e9f63e5da83..624c9ab487c8d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -88,7 +88,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, return; } GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return (a.y > b.y); }; + auto comp = [](const auto& a, const auto& b) { return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); }; GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } From 7732f5c426049d1eba9711b31626d4fb86d701b7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:51:13 +0200 Subject: [PATCH 0351/1764] GPU: Deduplicate sort comparisons: Use structs, since both hipcub and rocthrust do not work with lambdas for some reason --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 82 ---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 143 ++++++++++-------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 20 ++- 3 files changed, 98 insertions(+), 147 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 1d633eb5e748f..44cde3d4ac48a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -14,88 +14,6 @@ #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct MergeBorderTracks_compMax { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); - } -}; -struct MergeBorderTracks_compMin { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); - } -}; - -struct GPUTPCGMMergerSortTracks_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerSortTracksQPt_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerMergeLoopers_comp { - GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) - { - return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); - } -}; - -struct GPUTPCGMO2OutputSort_comp { - GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) - { - return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); - } -}; - -} // anonymous namespace -} // namespace o2::gpu::internal - template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 99ef548b2d78e..b12375a10023a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -59,17 +59,13 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif -namespace o2::gpu::internal -{ -} +static constexpr int32_t kMaxParts = 400; +static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; + using namespace o2::gpu; -using namespace o2::gpu::internal; using namespace o2::tpc; using namespace gputpcgmmergertypes; -static constexpr int32_t kMaxParts = 400; -static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; - namespace o2::gpu::internal { struct MergeLooperParam { @@ -78,8 +74,79 @@ struct MergeLooperParam { float y; uint32_t id; }; + +struct MergeBorderTracks_compMax { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); + } +}; +struct MergeBorderTracks_compMin { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); + } +}; + +struct GPUTPCGMMergerSortTracks_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + if (a.CCE() != b.CCE()) { + return a.CCE() > b.CCE(); + } + if (a.Legs() != b.Legs()) { + return a.Legs() > b.Legs(); + } + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE + return a.NClusters() > b.NClusters(); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerSortTracksQPt_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerMergeLoopers_comp { + GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) + { + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); + } +}; + } // namespace o2::gpu::internal +using namespace o2::gpu::internal; + #ifndef GPUCA_GPUCODE #include "GPUQA.h" @@ -742,11 +809,11 @@ template <> GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread == 0) { + if (iThread == 0 && iBlock == 0) { if (cmpMax) { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMax()); } else { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMin()); } } #endif @@ -1757,60 +1824,18 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mOutputTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, comp); #endif } GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mOutputTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, comp); #endif } @@ -1945,11 +1970,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); }; - GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 624c9ab487c8d..1e08058fb22dd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -34,6 +34,18 @@ using namespace o2::tpc::constants; GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } +namespace o2::gpu::internal +{ + +struct GPUTPCGMO2OutputSort_comp { + GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) + { + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); + } +}; + +} // namespace o2::gpu::internal + template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { @@ -84,12 +96,10 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); + GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, internal::GPUTPCGMO2OutputSort_comp()); } - GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); }; - GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } From f75693ddbe0b19eb445da5a9d9972f73fdd86b96 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 11:04:35 +0200 Subject: [PATCH 0352/1764] GPU: Remove obsolete files used for tests in Run 2 --- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 149 ------------------ .../Merger/GPUTPCGMTracksToTPCSeeds.h | 29 ---- 2 files changed, 178 deletions(-) delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx deleted file mode 100644 index ebc9d22560524..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.cxx -/// \author David Rohr - -#include "GPUTPCGMTracksToTPCSeeds.h" -#include "GPUTPCGlobalMergerComponent.h" -#include "GPUTPCGMMergerTypes.h" -#include "GPUTPCGMMerger.h" -#include "GPULogging.h" -#include "AliTPCtracker.h" -#include "AliTPCtrack.h" -#include "AliTPCseed.h" -#include "AliTPCtrackerSector.h" -#include "TObjArray.h" -#include "AliTPCclusterMI.h" - -using namespace o2::gpu; - -void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - seeds->Clear(); - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - - AliTPCtrack tr; - tr.Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - AliTPCseed* seed = new (tpctracker->NextFreeSeed()) AliTPCseed(tr); - for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - seed->SetClusterPointer(j, nullptr); - seed->SetClusterIndex(j, -1); - } - int32_t ncls = 0; - int32_t lastrow = -1; - int32_t lastleg = -1; - for (int32_t j = track.NClusters() - 1; j >= 0; j--) { - const GPUTPCGMMergedTrackHit& cls = merger->Clusters()[track.FirstClusterRef() + j]; - if (cls.state & GPUTPCGMMergedTrackHit::flagReject) { - continue; - } - if (lastrow != -1 && (cls.row < lastrow || cls.leg != lastleg)) { - break; - } - if (cls.row == lastrow) { - continue; - } - - AliTPCtrackerRow& row = tpctracker->GetRow(cls.sector % 18, cls.row); - uint32_t clIndexOffline = 0; - AliTPCclusterMI* clOffline = row.FindNearest2(cls.y, cls.z, 0.01f, 0.01f, clIndexOffline); - if (!clOffline) { - continue; - } - clIndexOffline = row.GetIndex(clIndexOffline); - - clOffline->Use(10); - seed->SetClusterPointer(cls.row, clOffline); - seed->SetClusterIndex2(cls.row, clIndexOffline); - - lastrow = cls.row; - lastleg = cls.leg; - ncls++; - } - - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - seed->SetNumberOfClusters(ncls); - seed->SetNFoundable(ncls); - seed->SetChi2(track.GetParam().GetChi2()); - - float alpha = seed->GetAlpha(); - if (alpha >= 2.f * M_PI) { - alpha -= 2.f * M_PI; - } - if (alpha < 0) { - alpha += 2.f * M_PI; - } - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - - seed->SetPoolID(tpctracker->GetLastSeedId()); - seed->SetIsSeeding(kTRUE); - seed->SetSeed1(GPUCA_ROW_COUNT - 1); - seed->SetSeed2(GPUCA_ROW_COUNT - 2); - seed->SetSeedType(0); - seed->SetFirstPoint(-1); - seed->SetLastPoint(-1); - seeds->AddLast(seed); // note, track is seed, don't free the seed - index++; - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - const gputpcgmmergertypes::GPUTPCOuterParam& param = track.OuterParam(); - seed->Set(param.X, param.alpha, param.P, param.C); - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - seed->Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - } -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h deleted file mode 100644 index 029cb108d4119..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.h -/// \author David Rohr - -#ifndef GPUTPCGMTRACKSTOTPCSEEDS_H -#define GPUTPCGMTRACKSTOTPCSEEDS_H - -class TObjArray; -class AliTPCtracker; - -class GPUTPCGMTracksToTPCSeeds -{ - public: - static void CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker); - static void UpdateParamsOuter(TObjArray* seeds); - static void UpdateParamsInner(TObjArray* seeds); -}; - -#endif From 747fb860184729b2d219e7b0a044d09e15c7a1b5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 11:04:56 +0200 Subject: [PATCH 0353/1764] GPU TPC: Some more member variable renaming --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 4 +- .../Global/GPUChainTrackingMerger.cxx | 8 ++-- .../Global/GPUChainTrackingRefit.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 38 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 18 ++++----- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 14 +++---- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 12 +++--- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 10 ++--- 10 files changed, 59 insertions(+), 59 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 44cde3d4ac48a..85567d70d70d6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -27,13 +27,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index df80eabfb8761..2b3d719a27dea 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -256,10 +256,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -326,7 +326,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mRec->ReturnVolatileDeviceMemory(); } - mIOPtrs.mergedTracks = Merger.OutputTracks(); + mIOPtrs.mergedTracks = Merger.MergedTracks(); mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); @@ -340,7 +340,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC(); if (doGPU) { - processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); + processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks(); processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 4662b5464f710..5ca20a39d0462 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -33,7 +33,7 @@ int32_t GPUChainTracking::RunRefit() SetupGPUProcessor(&Refit, false); RefitShadow.SetPtrsFromGPUConstantMem(processorsShadow(), doGPU ? &processorsDevice()->param : nullptr); RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); - RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); + RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.MergedTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index b12375a10023a..f1a0816529c3a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -365,11 +365,11 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) void* GPUTPCGMMerger::SetPointersOutput(void* mem) { - computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { - computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks); if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); @@ -1318,7 +1318,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) { b.SetTrackID(itr); - b.SetNClusters(mOutputTracks[itr].NClusters()); + b.SetNClusters(mMergedTracks[itr].NClusters()); if (CAMath::Abs(b.Cov()[4]) >= 0.5f) { b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack? } @@ -1339,11 +1339,11 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { - if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { + if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; } - GPUTPCGMMergedTrack* trk[2] = {&mOutputTracks[i], &mOutputTracks[mTrackLinks[i]]}; + GPUTPCGMMergedTrack* trk[2] = {&mMergedTracks[i], &mMergedTracks[mTrackLinks[i]]}; if (!trk[1]->OK() || trk[1]->CCE()) { continue; @@ -1459,7 +1459,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mMergedTracks[i].CCE() == false) {mMergedTracks[i].SetNClusters(0);mMergedTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1752,7 +1752,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread continue; } - GPUTPCGMMergedTrack& mergedTrack = mOutputTracks[iOutputTrack]; + GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; mergedTrack.SetFlags(0); mergedTrack.SetOK(1); @@ -1825,7 +1825,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0 && iBlock == 0) { - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mOutputTracks)); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mMergedTracks)); } #endif } @@ -1834,7 +1834,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0 && iBlock == 0) { - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mOutputTracks)); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mMergedTracks)); } #endif } @@ -1843,7 +1843,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (trk.OK()) { for (uint32_t j = 0; j < trk.NClusters(); j++) { mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood; @@ -1884,7 +1884,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; } @@ -1919,7 +1919,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { @@ -1983,7 +1983,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, #if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE) std::vector paramLabels(mMemory->nLooperMatchCandidates); for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) { - paramLabels[i] = GetTrackLabel(mOutputTracks[params[i].id]); + paramLabels[i] = GetTrackLabel(mMergedTracks[params[i].id]); } /*std::vector dropped(mMemory->nLooperMatchCandidates); std::vector droppedMC(mMemory->nLooperMatchCandidates); @@ -2005,8 +2005,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, // bs |= 1; continue; } - const auto& trk1 = mOutputTracks[params[i].id]; - const auto& trk2 = mOutputTracks[params[j].id]; + const auto& trk1 = mMergedTracks[params[i].id]; + const auto& trk2 = mMergedTracks[params[j].id]; const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { @@ -2045,7 +2045,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const int64_t label2 = paramLabels[j]; bool labelEQ = label1 != -1 && label1 == label2; if (1 || EQ || labelEQ) { - // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mOutputTracks[params[i].id].CSide(), (int32_t)mOutputTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); + // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[params[i].id].CSide(), (int32_t)mMergedTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); static auto& tup = GPUROOTDump::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs"); tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs); static auto tup2 = GPUROOTDump::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr"); @@ -2063,9 +2063,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, }*/ #endif if (EQ) { - mOutputTracks[params[j].id].SetMergedLooper(true); + mMergedTracks[params[j].id].SetMergedLooper(true); if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) { - mOutputTracks[params[i].id].SetMergedLooper(true); + mMergedTracks[params[i].id].SetMergedLooper(true); } } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index ae85f20b17b48..4487b6d937dc2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -104,12 +104,12 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersMemory(void* mem); GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } - GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } - GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdx() { return mOutputTracksdEdx; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdxAlt() const { return mOutputTracksdEdxAlt; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdxAlt() { return mOutputTracksdEdxAlt; } + GPUhdi() const GPUTPCGMMergedTrack* MergedTracks() const { return mMergedTracks; } + GPUhdi() GPUTPCGMMergedTrack* MergedTracks() { return mMergedTracks; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdx() const { return mMergedTracksdEdx; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdx() { return mMergedTracksdEdx; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdxAlt() const { return mMergedTracksdEdxAlt; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdxAlt() { return mMergedTracksdEdxAlt; } GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } @@ -262,9 +262,9 @@ class GPUTPCGMMerger : public GPUProcessor uint16_t mMemoryResOutputO2Scratch = (uint16_t)-1; int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks) - GPUTPCGMMergedTrack* mOutputTracks = nullptr; //* array of output merged tracks - GPUdEdxInfo* mOutputTracksdEdx = nullptr; //* dEdx information - GPUdEdxInfo* mOutputTracksdEdxAlt = nullptr; //* dEdx alternative information + GPUTPCGMMergedTrack* mMergedTracks = nullptr; //* array of output merged tracks + GPUdEdxInfo* mMergedTracksdEdx = nullptr; //* dEdx information + GPUdEdxInfo* mMergedTracksdEdxAlt = nullptr; //* dEdx alternative information GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks int32_t* mSectorTrackInfoIndex = nullptr; GPUTPCGMMergedTrackHit* mClusters = nullptr; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 3be32a2d87610..9c924e74ec519 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -139,7 +139,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; } @@ -151,7 +151,7 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; } @@ -167,7 +167,7 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const } out << " Clusters\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mOutputTracks[j]; + const auto& trk = mMergedTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; @@ -196,14 +196,14 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const out << std::setprecision(2); out << "\nTPC Merger Refit\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; if (trk.NClusters() == 0) { continue; } const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() - << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) + << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } @@ -217,7 +217,7 @@ void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const if (i && i % 100 == 0) { out << "\n"; } - out << (int)mOutputTracks[i].MergedLooper() << " "; + out << (int)mMergedTracks[i].MergedLooper() << " "; } out << "\n"; } @@ -226,7 +226,7 @@ void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mOutputTracks[j]; + const auto& trk = mMergedTracks[j]; if (trk.NClusters() == 0) { continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 68763b3549547..1631777d80482 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -24,7 +24,7 @@ GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThread const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; - GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); + GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, mode == -1); }); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 1e08058fb22dd..eb22ca49e9242 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -49,10 +49,10 @@ struct GPUTPCGMO2OutputSort_comp { template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); - const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); + const GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); constexpr uint8_t flagsReject = getFlagsReject(); const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); @@ -107,9 +107,9 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { constexpr float MinDelta = 0.1f; - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); - GPUdEdxInfo* tracksdEdxAlt = merger.OutputTracksdEdxAlt(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); + GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); + GPUdEdxInfo* tracksdEdxAlt = merger.MergedTracksdEdxAlt(); const int32_t nTracks = merger.NOutputTracksTPCO2(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); constexpr uint8_t flagsReject = getFlagsReject(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 366f75cb05e56..4b616fce83f5f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -94,10 +94,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ storeOuter = 0; if (iWay == nWays - 1) { StoreOuter(outerParam, prop, 0); - if (merger->OutputTracks()[iTrk].Looper()) { + if (merger->MergedTracks()[iTrk].Looper()) { storeOuter = 1; } - } else if (iWay == nWays - 2 && merger->OutputTracks()[iTrk].Looper()) { + } else if (iWay == nWays - 2 && merger->MergedTracks()[iTrk].Looper()) { storeOuter = 2; } } @@ -435,9 +435,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. if (param.par.dodEdx && param.dodEdxEnabled) { - dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); + dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } Alpha = prop.GetAlpha(); @@ -596,7 +596,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->MergedTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -1136,7 +1136,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr t.QPt() = 1.e-4f; } - CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->OutputTracks()[iTrk].Looper()); }); + CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); }); track.SetOK(ok); track.SetNClustersFitted(nTrackHits); diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index e63bb82a9b09e..5af3ebb51b9d6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -105,8 +105,8 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0) { int32_t firstIdx = j; - auto firstItem = merger.OutputTracks()[firstIdx]; + auto firstItem = merger.MergedTracks()[firstIdx]; int32_t currIdx = firstIdx; int32_t sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; do { tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = merger.OutputTracks()[sourceIdx]; + merger.MergedTracks()[currIdx] = merger.MergedTracks()[sourceIdx]; currIdx = sourceIdx; sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; } while (sourceIdx != firstIdx); tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = firstItem; + merger.MergedTracks()[currIdx] = firstItem; } } } From b0414adf124ee153273fd9fefc7080690119eb89 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 23:00:50 +0200 Subject: [PATCH 0354/1764] Out of line FairTBuffer destructor (#14265) --- Framework/Core/include/Framework/TMessageSerializer.h | 3 +++ Framework/Core/src/TMessageSerializer.cxx | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Framework/Core/include/Framework/TMessageSerializer.h b/Framework/Core/include/Framework/TMessageSerializer.h index 769d23a7a3427..b6e355638c1e3 100644 --- a/Framework/Core/include/Framework/TMessageSerializer.h +++ b/Framework/Core/include/Framework/TMessageSerializer.h @@ -40,6 +40,8 @@ class FairOutputTBuffer : public TBufferFile : TBufferFile(TBuffer::kWrite, msg.GetSize() - sizeof(char*), embedInItself(msg), false, fairMQrealloc) { } + + ~FairOutputTBuffer() override; // Helper function to keep track of the FairMQ message that holds the data // in the data itself. We can use this to make sure the message can be reallocated // even if we simply have a pointer to the data. Hopefully ROOT will not play dirty @@ -60,6 +62,7 @@ class FairInputTBuffer : public TBufferFile : TBufferFile(TBuffer::kRead, size - sizeof(char*), data + sizeof(char*), false, nullptr) { } + ~FairInputTBuffer() override; }; struct TMessageSerializer { diff --git a/Framework/Core/src/TMessageSerializer.cxx b/Framework/Core/src/TMessageSerializer.cxx index c5da4cc576242..81a1c6e537d09 100644 --- a/Framework/Core/src/TMessageSerializer.cxx +++ b/Framework/Core/src/TMessageSerializer.cxx @@ -15,6 +15,9 @@ using namespace o2::framework; +FairOutputTBuffer::~FairOutputTBuffer() = default; +FairInputTBuffer::~FairInputTBuffer() = default; + void* FairOutputTBuffer::embedInItself(fair::mq::Message& msg) { // The first bytes of the message are used to store the pointer to the message itself From f4a478c778dc6b2672f71096f7915c10d18543d2 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 23:02:37 +0200 Subject: [PATCH 0355/1764] DPL: keep codechecker happy (#14270) --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 6 ++++-- Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index 74bd08ce943ee..b216f5ec54570 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -615,8 +615,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, continue; } mClassCountersA[i]++; - if (cls->descriptor == nullptr) + if (cls->descriptor == nullptr) { continue; + } uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { @@ -632,8 +633,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, // if inps => class mask for (auto const& cls : mCTPConfig.getCTPClasses()) { // cls.printStream(std::cout); - if (cls.descriptor == nullptr) + if (cls.descriptor == nullptr) { continue; + } uint64_t clsinpmask = cls.descriptor->getInputsMask(); // class definition uint64_t diginpmask = digit.CTPInputMask.to_ullong(); uint64_t digclsmask = digit.CTPClassMask.to_ullong(); diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 3f7c729b351a3..2df6bc981ce44 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -71,8 +71,9 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) std::cout << std::endl; LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); LOG(info) << "Number of missing TF:" << nmiss << std::endl; - if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) + if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) { LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + } std::array clsA = mDecoder.getClassCountersA(); std::array clsB = mDecoder.getClassCountersB(); std::array clsEA = mDecoder.getClassErrorsA(); @@ -80,8 +81,9 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; - if (clsEA[i]) + if (clsEA[i]) { LOG(error) << " Class without inputs:"; + } LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; } } From 17345d60e5a63f085e86d1064c315d6f88c326b3 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 14 May 2025 19:29:47 +0200 Subject: [PATCH 0356/1764] DPL: Out of line NumericBuilders (#14273) --- Framework/Core/include/Framework/TableBuilder.h | 6 ++++++ Framework/Core/src/TableBuilder.cxx | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 936a8a04d5a5a..8d7601cefc634 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -48,6 +48,12 @@ struct BulkInfo { size_t size; }; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; + namespace o2::framework { namespace detail diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index eb19f8d3fe642..2169722efa9da 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -131,3 +131,9 @@ std::shared_ptr spawnerHelper(std::shared_ptr const& } } // namespace o2::framework + +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; From f44f2362e789b6e3a43214b5a0f48ba9c40838f7 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 14 May 2025 14:32:37 +0200 Subject: [PATCH 0357/1764] Update RecoContainer.cxx --- .../Detectors/GlobalTracking/src/RecoContainer.cxx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index d4b4e2b89cbb0..dd206ffe3b70d 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1440,8 +1440,7 @@ RecoContainer::GlobalIDSet RecoContainer::getSingleDetectorRefs(GTrackID gidx) c table[GTrackID::TRD] = parent0.getTrackRef(); // there is no standalone TRD track, so use the index for the ITSTPCTRD track array } else if (src == GTrackID::TPCTRDTOF) { const auto& parent0 = getTOFMatch(gidx); // TPCTRD : TOF - const auto& parent1 = getITSTPCTRDTrack(parent0.getTrackRef()); - const auto& parent2 = getTPCITSTrack(parent1.getRefGlobalTrackId()); + const auto& parent1 = getTPCTRDTrack(parent0.getTrackRef()); table[GTrackID::TPCTRD] = parent0.getTrackRef(); table[GTrackID::TPC] = parent1.getRefGlobalTrackId(); table[GTrackID::TOF] = {unsigned(parent0.getIdxTOFCl()), GTrackID::TOF}; @@ -1547,8 +1546,6 @@ const o2::dataformats::MCTruthContainer* RecoContainer::getE void RecoContainer::getTrackTimeITSTPCTRDTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getITSTPCTRDTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC-TRD track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } @@ -1557,8 +1554,6 @@ void RecoContainer::getTrackTimeITSTPCTRDTOF(GTrackID gid, float& t, float& tErr void RecoContainer::getTrackTimeTPCTRDTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getTPCTRDTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC-TRD track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } @@ -1567,8 +1562,6 @@ void RecoContainer::getTrackTimeTPCTRDTOF(GTrackID gid, float& t, float& tErr) c void RecoContainer::getTrackTimeITSTPCTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getITSTPCTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } From 07096be128091de462d688c88e5f4cf0f5866729 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:56:55 +0200 Subject: [PATCH 0358/1764] GPU: Add some more type trait templates for GPU code --- GPU/Common/GPUCommonTypeTraits.h | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 6d72565d1f1fb..f837019c11875 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -22,7 +22,7 @@ #include #endif #else -// We just reimplement some type traits in std for the GPU +// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template @@ -35,6 +35,7 @@ struct conditional { }; template using contitional_t = typename conditional::type; + template struct is_same { static constexpr bool value = false; @@ -45,6 +46,7 @@ struct is_same { }; template static constexpr bool is_same_v = is_same::value; + template struct enable_if { }; @@ -52,6 +54,7 @@ template struct enable_if { typedef T type; }; + template struct remove_cv { typedef T type; @@ -68,6 +71,9 @@ template struct remove_cv { typedef T type; }; +template +using remove_cv_t = typename remove_cv::type; + template struct remove_const { typedef T type; @@ -76,6 +82,9 @@ template struct remove_const { typedef T type; }; +template +using remove_const_t = typename remove_const::type; + template struct remove_volatile { typedef T type; @@ -84,6 +93,9 @@ template struct remove_volatile { typedef T type; }; +template +using remove_volatile_t = typename remove_volatile::type; + template struct is_pointer_t { static constexpr bool value = false; @@ -95,6 +107,36 @@ struct is_pointer_t { template struct is_pointer : is_pointer_t::type> { }; + +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +using remove_reference_t = typename remove_reference::type; + +template +struct is_member_pointer_helper { + static constexpr bool value = false; +}; +template +struct is_member_pointer_helper { + static constexpr bool value = true; +}; +template +struct is_member_pointer : is_member_pointer_helper::type> { +}; +template +static constexpr bool is_member_pointer_v = is_member_pointer::value; + } // namespace std #endif From 4654958fe006df87ce60aaf48d61184db85e76d3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:57:10 +0200 Subject: [PATCH 0359/1764] Revert "GPU: Workaround for Clang Frontend issue" This reverts commit 89b35ba2d75113e60b2045ed01e169b28d860a07. --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ---- GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 ----- 2 files changed, 9 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 67ad608c13417..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,11 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + -#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + -#else - std::string("#define GPUCA_RTC_CONSTEXPR\n") + -#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index c89ef1769ad81..6eded3499e46e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,8 +270,3 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() - -set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" -TARGET_DIRECTORY O2::GPUTrackingHIP -PROPERTIES -COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 46ef93fdb9436f1b1bcebd01a3458235ed918c80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:29 +0200 Subject: [PATCH 0360/1764] GPU: Template workaround to get static constexpr values as constexpr from references --- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 67 +++++++++++++++++++ GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 11 +-- 3 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUGetConstexpr.h diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h new file mode 100644 index 0000000000000..8001b4e98c83f --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -0,0 +1,67 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUGetConstexpr.h +/// \author David Rohr + +#ifndef GPUGETCONSTEXPR_H +#define GPUGETCONSTEXPR_H + +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" + +// This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references + +#if defined(__clang__) && __cplusplus >= 202002L && __cplusplus < 202302L + +namespace o2::gpu::internal +{ + +#define GPUCA_GET_CONSTEXPR(obj, val) ( \ + std::is_member_pointer_v::val)> ? o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>(&obj).value) : o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>().value)) + +template +struct getConstexprHelper; + +template + requires(!std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const void* = nullptr) {} + static constexpr const void* value = nullptr; +}; + +template + requires(std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const S& v) : value(v) {} + GPUdDefault() constexpr getConstexprHelper() = default; + const S value = nullptr; +}; + +GPUdi() constexpr auto getConstexpr(const auto* v, const void* = nullptr) +{ + return *v; +} + +GPUdi() constexpr auto getConstexpr(const auto v, const auto w) +{ + return w->*v; +} + +} // namespace o2::gpu::internal + +#else // __clang__ + +#define GPUCA_GET_CONSTEXPR(obj, val) (obj).val + +#endif + +#endif // GPUGETCONSTEXPR_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index eb22ca49e9242..7bb28a9f22e31 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -20,6 +20,7 @@ #include "DataFormatsTPC/PIDResponse.h" #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" +#include "GPUGetConstexpr.h" #ifndef GPUCA_GPUCODE #include "SimulationDataFormat/ConstMCTruthContainer.h" @@ -141,10 +142,10 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { oTrack.setdEdxAlt(tracksdEdxAlt[i]); } else { oTrack.setdEdxAlt(tracksdEdx[i]); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 4b616fce83f5f..1072e4b178bdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -39,6 +39,7 @@ #include "GPUTPCConvertImpl.h" #include "GPUTPCGMMergerTypes.h" #include "GPUParam.inc" +#include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED #include "../utils/qconfig.h" @@ -216,12 +217,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.fillSubThreshold(lastRow - wayDirection); } } @@ -371,7 +372,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { @@ -395,7 +396,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (acc) { dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { if (accAlt) { dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } @@ -436,7 +437,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } From 4d647840509e57b890f4ce71fdb062f2edf7b234 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:48 +0200 Subject: [PATCH 0361/1764] GPU: Simplify some type_traits use, get rid of ::values and ::type --- .../Base/cuda/GPUReconstructionCUDAInternals.h | 2 +- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 8 ++++---- GPU/GPUTracking/utils/bitfield.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 0813c9d22ea09..493c09e448e5e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -79,7 +79,7 @@ class GPUDebugTiming bool mDo; }; -static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "CUDA event type incompatible to deviceEvent"); } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 949dd6195b262..ce05e159461e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -18,7 +18,7 @@ #include -static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "OpenCL event type incompatible to deviceEvent"); #define GPUErrorReturn(...) \ { \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f1a0816529c3a..73ca449252d1d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -235,7 +235,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const { GPUTPCGMSectorTrack* sectorTrack = nullptr; int32_t nClusters = 0; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { sectorTrack = &mSectorTrackInfos[trk.TrackID()]; nClusters = sectorTrack->OrigTrack()->NHits(); } else { @@ -244,7 +244,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const auto acc = GPUTPCTrkLbl(resolveMCLabels(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f); for (int32_t i = 0; i < nClusters; i++) { int32_t id; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index a7811c6fd55ed..a830562119467 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -48,7 +48,7 @@ class GPUTPCTrkLbl } inline void addLabel(uint32_t elementId) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { for (uint32_t i = 0; i < sizeof(mClusterLabels[elementId]) / sizeof(mClusterLabels[elementId].fClusterID[0]); i++) { const auto& element = mClusterLabels[elementId].fClusterID[i]; if (element.fMCID >= 0) { @@ -101,7 +101,7 @@ class GPUTPCTrkLbl } } auto& bestLabel = mLabels[bestLabelNum].first; - if constexpr (std::is_same::value && WEIGHT) { + if constexpr (std::is_same_v && WEIGHT) { *labelWeight = bestLabel.fWeight; *totalWeight = mTotalWeight; *maxCount = bestLabelCount; @@ -147,7 +147,7 @@ struct GPUTPCTrkLbl_ret { template class S, typename... Args> static inline auto GPUTPCTrkLbl(const S* x, Args... args) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl>(x, args...); } else { return internal::GPUTPCTrkLbl, U>(x, args...); @@ -159,7 +159,7 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args) { using S = AliHLTTPCClusterMCLabel; using T = AliHLTTPCClusterMCWeight; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl(x, args...); } else { return internal::GPUTPCTrkLbl(x, args...); diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index 9730f6c6c234f..a3a3ac9a5bd95 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -93,7 +93,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) - static_assert(std::is_integral::value, "Storage type non integral"); + static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cdb41ec5813f2..839954e52ded3 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -126,7 +126,7 @@ static inline int32_t qAddOptionMainTupleElem(qConfigSettings settings = settingsTup; return (qAddOptionType(settings, ref, i, argv, argc, def)); } -template ::value> +template > struct qAddOptionMainTupleStruct { static inline int32_t qAddOptionMainTuple(qConfigSettings::settingsType> settings, T& tup, int32_t& i, const char** argv, const int argc) { @@ -157,13 +157,13 @@ struct qConfigType { // Recursive handling of additional settings static inline void qProcessSetting(qConfigSettings& settings, qmin_t minval) { - static_assert(!std::is_same::value, "min option not supported for boolean settings"); + static_assert(!std::is_same_v, "min option not supported for boolean settings"); settings.checkMin = true; settings.min = minval.v; } static inline void qProcessSetting(qConfigSettings& settings, qmax_t maxval) { - static_assert(!std::is_same::value, "max option not supported for boolean settings"); + static_assert(!std::is_same_v, "max option not supported for boolean settings"); settings.checkMax = true; settings.max = maxval.v; } @@ -244,7 +244,7 @@ struct qConfigType { static inline void qConfigHelpOption(const char* name, const char* type, const char* def, const char* optname, char optnameshort, const char* preopt, char preoptshort, int32_t optionType, const char* help, Args&&... args) { auto settings = qConfigGetSettings(args...); - const bool boolType = optionType != 1 && std::is_same::value; + const bool boolType = optionType != 1 && std::is_same_v; const char* arguments = settings.doSet ? " (" : (settings.doDefault || optionType == 1 || boolType) ? " [arg] (" : optionType == 2 ? " [...] (" : " arg ("; char argBuffer[4] = {0}; uint32_t argBufferPos = 0; From 073cd1697027762311775ec251cea232c701db80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 14 May 2025 20:43:33 +0200 Subject: [PATCH 0362/1764] GPU CMake: Use CUDA/HIP compilers to preprocess CUDA/HIP RTC files --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index c31dd0c8d3fe2..d9ee132d7c5f5 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -68,7 +68,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} COMMAND_EXPAND_LISTS diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..a47c659c8717d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,7 +115,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} DEPENDS ${MODULE}_HIPIFIED From 895906ee43086663b84232a35e935474a06d6cb7 Mon Sep 17 00:00:00 2001 From: Mario Sitta Date: Thu, 15 May 2025 15:41:14 +0200 Subject: [PATCH 0363/1764] Implementation of MFT rails inside Cage --- .../simulation/include/ITSSimulation/V3Cage.h | 21 +++ .../ITSMFT/ITS/simulation/src/V3Cage.cxx | 146 ++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h b/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h index e90f0cfeb0aed..44d72284112c2 100644 --- a/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h +++ b/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h @@ -113,6 +113,16 @@ class V3Cage : public V11Geometry /// \param mgr The GeoManager (used only to get the proper material) TGeoVolume* createCageClosingCross(const TGeoManager* mgr = gGeoManager); + /// Creates and places the MFT rails inside the Cage + /// \param mother The mother volume to place the rails into + /// \param mgr The GeoManager (used only to get the proper material) + void createAndPlaceMFTRailsInsideCage(TGeoVolume* mother, const TGeoManager* mgr = gGeoManager); + + /// Creates a pair of MFT rails inside the Cage + /// \param motmed Medium material of the mother volume + /// \param mgr The GeoManager (used only to get the proper material) + TGeoVolume* createMFTRailsPair(const TGeoMedium* motmed, const TGeoManager* mgr = gGeoManager); + // Parameters static const Double_t sCageYInBarrel; ///< Global Y translation @@ -244,6 +254,17 @@ class V3Cage : public V11Geometry static const Double_t sCageCrossBarThick; ///< Closing cross bar thickness static const Double_t sCageCrossBarPhi; ///< Closing cross bar angle + // MFT Rails inside the Cage + static const Double_t sCageMFTRailZLen; ///< Total length of the rail + static const Double_t sCageMFTRailTotWidth; ///< Total width of the rail + static const Double_t sCageMFTRailExtWidth; ///< Width of the external part + static const Double_t sCageMFTRailIntWidth; ///< Width of the internal part + static const Double_t sCageMFTRailBaseWidth; ///< Width of the rail base + static const Double_t sCageMFTRailTotHeight; ///< Total height of the rail + static const Double_t sCageMFTRailExtHeight; ///< Height of the external part + static const Double_t sCageMFTRailIntHeight; ///< Height of the internal part + static const Double_t sCageMFTRailsXDist; ///< X distance between rails + ClassDefOverride(V3Cage, 0); // ITS v3 support geometry }; } // namespace its diff --git a/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx b/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx index 3b17d7afeef3d..528b74dc339f1 100644 --- a/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx +++ b/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx @@ -167,6 +167,16 @@ const Double_t V3Cage::sCageCrossZLength = 8 * sMm; const Double_t V3Cage::sCageCrossBarThick = 20 * sMm; const Double_t V3Cage::sCageCrossBarPhi = 25; // Deg +const Double_t V3Cage::sCageMFTRailZLen = 1807 * sMm; +const Double_t V3Cage::sCageMFTRailTotWidth = 27 * sMm; +const Double_t V3Cage::sCageMFTRailExtWidth = 24 * sMm; +const Double_t V3Cage::sCageMFTRailIntWidth = 17.5 * sMm; +const Double_t V3Cage::sCageMFTRailBaseWidth = 22 * sMm; +const Double_t V3Cage::sCageMFTRailTotHeight = 8.9 * sMm; +const Double_t V3Cage::sCageMFTRailExtHeight = 5.9 * sMm; +const Double_t V3Cage::sCageMFTRailIntHeight = 3.5 * sMm; +const Double_t V3Cage::sCageMFTRailsXDist = 44 * sMm; + ClassImp(V3Cage); V3Cage::V3Cage() @@ -251,6 +261,9 @@ void V3Cage::createAndPlaceCage(TGeoVolume* mother, const TGeoManager* mgr) zpos = sBPSuppZPos + sBPSuppCollarBeamWid / 2; mother->AddNode(cageBPSupport, 1, new TGeoTranslation(0, ypos, zpos)); + // The MFT Rails inside the Cage + createAndPlaceMFTRailsInsideCage(mother, mgr); + return; } @@ -1648,3 +1661,136 @@ TGeoVolume* V3Cage::createCageClosingCross(const TGeoManager* mgr) // Finally return the closing cross volume return closCrossVol; } + +void V3Cage::createAndPlaceMFTRailsInsideCage(TGeoVolume* mother, const TGeoManager* mgr) +{ + // + // Creates the MFT Rails located inside the Cage and place them + // + // Input: + // motmat : the material of the mother volume (for the container box) + // mgr : the GeoManager (used only to get the proper material) + // + // Output: + // + // Return: + // + // Created: 10 May 2025 Mario Sitta + // + + // Local variables + Double_t rdist, rpos, xpos, ypos, alpha; + Double_t xbox, ybox; + + // Create a pair of rails (a BBox container is returned) + TGeoVolume* cageMFTRails = createMFTRailsPair(mother->GetMedium(), mgr); + + // Now compute the radial distance and the XY position of the box + xbox = (static_cast(cageMFTRails->GetShape()))->GetDX(); + ybox = (static_cast(cageMFTRails->GetShape()))->GetDY(); + + rdist = TMath::Sqrt(sCageCoverRint * sCageCoverRint - xbox * xbox); + rpos = rdist - ybox; + + // Finally place the four pairs of rails inside the mother volume + xpos = rpos * TMath::Sin(sCageEndCapCableCutPhi * TMath::DegToRad()); + ypos = rpos * TMath::Cos(sCageEndCapCableCutPhi * TMath::DegToRad()) + sCageYInBarrel; + + alpha = -sCageEndCapCableCutPhi + 180; + mother->AddNode(cageMFTRails, 1, new TGeoCombiTrans(xpos, ypos, 0, new TGeoRotation("", alpha, 0, 0))); + alpha = sCageEndCapCableCutPhi + 180; + mother->AddNode(cageMFTRails, 2, new TGeoCombiTrans(-xpos, ypos, 0, new TGeoRotation("", alpha, 0, 0))); + + ypos = rpos * TMath::Cos(sCageEndCapCableCutPhi * TMath::DegToRad()) - sCageYInBarrel; + + alpha = sCageEndCapCableCutPhi; + mother->AddNode(cageMFTRails, 3, new TGeoCombiTrans(xpos, -ypos, 0, new TGeoRotation("", alpha, 0, 0))); + alpha = -sCageEndCapCableCutPhi; + mother->AddNode(cageMFTRails, 4, new TGeoCombiTrans(-xpos, -ypos, 0, new TGeoRotation("", alpha, 0, 0))); + + return; +} + +TGeoVolume* V3Cage::createMFTRailsPair(const TGeoMedium* motmed, const TGeoManager* mgr) +{ + // + // Creates a pair of MFT Rails located inside the Cage (from drawings + // ALI-MFT-DF-0057 and elements therein) + // A box containing a pair of rails is returned (a physical box + // is preferred over an Assembly for better performance) + // + // Input: + // motmat : the material of the mother volume (for the container box) + // mgr : the GeoManager (used only to get the proper material) + // + // Output: + // + // Return: + // A rail pair as a TGeoVolume + // + // Created: 10 May 2025 Mario Sitta + // + + // Local variables + const Int_t nv = 16; + Double_t xv[nv], yv[nv]; + Double_t deltah, xlen, ylen, zlen; + Double_t xpos, ypos; + + // The shape of a single rail: a Xtru + xv[0] = sCageMFTRailBaseWidth / 2; + yv[0] = 0.; + xv[1] = xv[0]; + yv[1] = sCageMFTRailTotHeight - sCageMFTRailExtHeight; + xv[2] = sCageMFTRailTotWidth / 2; + yv[2] = yv[1]; + xv[3] = xv[2]; + yv[3] = sCageMFTRailTotHeight; + xv[4] = sCageMFTRailIntWidth / 2; + yv[4] = yv[3]; + deltah = (sCageMFTRailExtHeight - sCageMFTRailIntHeight) / 2; + xv[5] = xv[4]; + yv[5] = yv[4] - deltah; + xv[6] = sCageMFTRailExtWidth / 2; + yv[6] = yv[5]; + xv[7] = xv[6]; + yv[7] = yv[6] - sCageMFTRailIntHeight; + + for (Int_t i = 8; i < nv; i++) { + xv[i] = -xv[15 - i]; + yv[i] = yv[15 - i]; + } + + zlen = sCageMFTRailZLen / 2; + + TGeoXtru* mftRailSh = new TGeoXtru(2); + mftRailSh->SetName("mftrailshape"); + mftRailSh->DefinePolygon(nv, xv, yv); + mftRailSh->DefineSection(0, -zlen); + mftRailSh->DefineSection(1, zlen); + + // The air container: a BBox + xlen = 2 * sCageMFTRailTotWidth + sCageMFTRailsXDist; + ylen = sCageMFTRailTotHeight / 2; + zlen = sCageMFTRailZLen / 2; + TGeoBBox* mftRailBoxSh = new TGeoBBox(xlen / 2, ylen, zlen); + + // We have the shape: now create the real volume + TGeoMedium* medAl = mgr->GetMedium(Form("%s_ALUMINUM$", GetDetName())); + + TGeoVolume* mftRailVol = new TGeoVolume("MFTRailInsideCage", mftRailSh, medAl); + mftRailVol->SetFillColor(kGray); + mftRailVol->SetLineColor(kGray); + + TGeoVolume* mftRailBoxVol = new TGeoVolume("MFTRailPairInsideCage", mftRailBoxSh, motmed); + + // Put the two rails inside the holding box + // (rail Y origin is on its lower face) + xpos = mftRailBoxSh->GetDX() - 0.5 * sCageMFTRailTotWidth; + ypos = mftRailBoxSh->GetDY(); + mftRailBoxVol->AddNode(mftRailVol, 1, new TGeoTranslation(xpos, -ypos, 0)); + mftRailBoxVol->AddNode(mftRailVol, 2, new TGeoTranslation(-xpos, -ypos, 0)); + + // Finally return the rails volume + return mftRailBoxVol; +} From c2cd436aaed5b1e0e21ba831f22b37b7184cd9b3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:32:10 +0200 Subject: [PATCH 0364/1764] GPU: Some work to prepare using and headers from system for GPU --- .../MathUtils/include/MathUtils/SMatrixGPU.h | 4 ++- .../include/CommonDataFormat/AbstractRef.h | 5 +++- Detectors/Raw/include/DetectorsRaw/RDHUtils.h | 6 +++-- GPU/GPUTracking/Base/GPUStdSystemHeaders.h | 25 +++++++++++++++++++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 ++- .../GPUReconstructionCUDAIncludesSystem.h | 7 +++--- GPU/GPUTracking/Base/hip/CMakeLists.txt | 3 ++- .../hip/GPUReconstructionHIPIncludesSystem.h | 4 +++ GPU/GPUTracking/Base/opencl/CMakeLists.txt | 8 +++++- .../Base/opencl/GPUReconstructionOCL.cl | 2 ++ GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 +- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 4 ++- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 5 +++- GPU/GPUTracking/Standalone/cmake/config.cmake | 2 +- .../TRDTracking/GPUTRDTrackerKernels.cxx | 5 +++- GPU/GPUTracking/utils/bitfield.h | 5 +--- 16 files changed, 72 insertions(+), 19 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUStdSystemHeaders.h diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 5ecdcd75a9906..675719cfc0751 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -29,7 +29,9 @@ #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif namespace o2::math_utils::detail { diff --git a/DataFormats/common/include/CommonDataFormat/AbstractRef.h b/DataFormats/common/include/CommonDataFormat/AbstractRef.h index 403bab3cbd62f..72c195cfb7bc8 100644 --- a/DataFormats/common/include/CommonDataFormat/AbstractRef.h +++ b/DataFormats/common/include/CommonDataFormat/AbstractRef.h @@ -18,7 +18,10 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + namespace o2::dataformats { diff --git a/Detectors/Raw/include/DetectorsRaw/RDHUtils.h b/Detectors/Raw/include/DetectorsRaw/RDHUtils.h index 2fac6f35d40c4..a5d8cc8615c79 100644 --- a/Detectors/Raw/include/DetectorsRaw/RDHUtils.h +++ b/Detectors/Raw/include/DetectorsRaw/RDHUtils.h @@ -19,13 +19,15 @@ #include "GPUCommonRtypes.h" #include "Headers/RAWDataHeader.h" #include "Headers/RDHAny.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif #if !defined(GPUCA_GPUCODE) #include "CommonDataFormat/InteractionRecord.h" #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #include "Headers/DAQID.h" -#endif // GPUCA_GPUCODE / GPUCA_STANDALONE +#endif namespace o2 { diff --git a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h new file mode 100644 index 0000000000000..6598085d309c7 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h @@ -0,0 +1,25 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUStdSystemHeaders.h +/// \author David Rohr + +#ifndef GPUSTDSYSTEMHEADERS_H +#define GPUSTDSYSTEMHEADERS_H + +#include +#include +#include +#include +#include +#include + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index d9ee132d7c5f5..97972265b3007 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -67,7 +67,9 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/GPUStdSystemHeaders.h ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h | grep -v GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h index 3f072059a9ad7..263d6939909c8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h @@ -15,9 +15,10 @@ #ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H #define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H -#include -#include -#include +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUStdSystemHeaders.h" +#endif + #include #include #include diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index a47c659c8717d..04a65922ad453 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -114,7 +114,8 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/GPUStdSystemHeaders.h ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h | grep -v GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h index 1a3a1ff0108af..0228f993aaee3 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h @@ -15,6 +15,10 @@ #ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H #define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUStdSystemHeaders.h" +#endif + #include #include #include diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 99ec36615a1d1..1a8a739adbecf 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -23,7 +23,7 @@ endif() set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) -set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) +set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -x cl -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() @@ -43,6 +43,11 @@ if (NOT DEFINED GPUCA_OCL_SPIRV_VERSION) set(GPUCA_OCL_SPIRV_VERSION 1.2) endif() +# execute_process(COMMAND bash -c "${LLVM_CLANG} -stdlib=libc++ -E -H -x c++ - <<< '#include ' 2>&1 1>/dev/null | grep type_traits | head -n 1 | sed 's/^\\.* *//'" +# OUTPUT_VARIABLE CLANG_STD_INCLUDE_DIR) +# get_filename_component(CLANG_STD_INCLUDE_DIR "${CLANG_STD_INCLUDE_DIR}" DIRECTORY) +# get_filename_component(CLANG_STD_INCLUDE_DIR "${CLANG_STD_INCLUDE_DIR}" ABSOLUTE) # TODO: For using in OpenCL, we would need to add -I${CLANG_STD_INCLUDE_DIR} + if(OPENCL_ENABLED_SPIRV) # BUILD OpenCL intermediate code for SPIR-V target # executes clang to create llvm IL code # Add -fintegrated-objemitter once we switch to clang >= 17 @@ -71,6 +76,7 @@ if(OPENCL_ENABLED) # BUILD OpenCL source code for runtime compilation target add_custom_command( OUTPUT ${CL_BIN}.src COMMAND ${LLVM_CLANG} + -target spir64 -Wno-unused-command-line-argument ${OCL_FLAGS} ${OCL_DEFINECL} diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index ffdc34d6c9881..3f58c0fea75e9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -70,6 +70,8 @@ typedef signed char int8_t; #endif #define assert(param) +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" // TODO: Once possible in OpenCL, should use GPUStdSystemHeaders.h here #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 6cc1e7266e722..3e9623e23559b 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -20,9 +20,9 @@ // These are basic and non-complex data types, which will also be visible on the GPU. // Please add complex data types required on the host but not GPU to GPUHostDataTypes.h and forward-declare! #ifndef GPUCA_GPUCODE_DEVICE +#include // for bitfield below #include #endif -#include "GPUCommonTypeTraits.h" #include "GPUTRDDef.h" struct AliHLTTPCClusterMCLabel; diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h index 8001b4e98c83f..b70890738e1b7 100644 --- a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -16,7 +16,9 @@ #define GPUGETCONSTEXPR_H #include "GPUCommonDef.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif // This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 502a70cb57762..a1993ec4a0ffa 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -28,7 +28,10 @@ #include "GPUCommonArray.h" #include "GPUParam.h" #include "GPUTrackParamConvert.h" -#include "GPUCommonTypeTraits.h" + +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif using namespace o2::gpu; using namespace o2::track; diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 1de0cfa27d7ee..0c7544aff7035 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -30,7 +30,7 @@ set(GPUCA_BUILD_DEBUG 0) set(GPUCA_BUILD_DEBUG_SANITIZE 0) set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2 #set(GPUCA_CUDA_GCCBIN c++-14) -#set(GPUCA_OPENCL_CLANGBIN clang-19) +#set(GPUCA_OPENCL_CLANGBIN clang-20) set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a" set(CUDA_COMPUTETARGET "default") # 86 89 #set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index d18f04e554043..dea4cdbca430e 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -15,10 +15,13 @@ #include "GPUTRDTrackerKernels.h" #include "GPUTRDGeometry.h" #include "GPUConstantMem.h" -#include "GPUCommonTypeTraits.h" #include "GPUReconstructionThreading.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + using namespace o2::gpu; template diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index a3a3ac9a5bd95..c5df80f1d6277 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -15,10 +15,6 @@ #ifndef Q_BITFIELD_H #define Q_BITFIELD_H -#if !defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -#include -#endif - template class bitfield { @@ -93,6 +89,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) + static_assert(std::is_void_v, "type_traits header missing"); static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif From a9e52c1e0175a24e673c74ec9bf2d8bf03017c0b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:34:58 +0200 Subject: [PATCH 0365/1764] GPU: Rename some misleading flag names --- GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 8 ++++---- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 6 +++--- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index 3c86dbfcd8d18..4ddd70efb5d1c 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -35,7 +35,7 @@ struct GPUTPCGMMergedTrackHit { flagRejectDistance = 0x20, flagRejectErr = 0x40, flagReject = 0x60, - flagNotFit = 0x80 }; + flagHighIncl = 0x80 }; }; struct GPUTPCGMMergedTrackHitXYZ { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 73ca449252d1d..1d5a7a0b1df47 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1895,7 +1895,7 @@ GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t uint8_t clusterState = mClusters[trk.FirstClusterRef() + j].state; if (!(clusterState & GPUTPCGMMergedTrackHit::flagReject)) { weight |= attachGood; - } else if (clusterState & GPUTPCGMMergedTrackHit::flagNotFit) { + } else if (clusterState & GPUTPCGMMergedTrackHit::flagHighIncl) { weight |= attachHighIncl; } if (mClusters[trk.FirstClusterRef() + j].leg == goodLeg) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 7bb28a9f22e31..74a8df388d163 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -32,7 +32,7 @@ using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; -GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } +GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } namespace o2::gpu::internal diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index db7a3b5884a12..97b307ce7a550 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -53,9 +53,9 @@ class GPUTPCGMPropagator updateErrorFitFailed = -1, updateErrorClusterRejected = 2, updateErrorClusterRejectedDistance = 2, - updateErrorEdgeCluster = 3, - updateErrorClusterRejectedInInterpolation = 4, - updateErrorClusterRejectedInUpdate = 5 + updateErrorClusterRejectedInInterpolation = 3, + updateErrorClusterRejectedInUpdate = 4, + updateErrorClusterRejectedEdge = 5 }; enum RejectChi2Mode { rejectDirect = 1, @@ -188,7 +188,7 @@ class GPUTPCGMPropagator GPUTPCGMPhysicalTrackModel mT0; MaterialCorrection mMaterial; FieldRegion mFieldRegion = TPC; - bool mSeedingErrors = 0; + bool mSeedingErrors = 0; // TODO: Hide variable in Run3 mode bool mFitInProjections = 1; // fit (Y,SinPhi,QPt) and (Z,DzDs) paramteres separatelly bool mPropagateBzOnly = 0; // Use Bz only in propagation bool mToyMCEvents = 0; // events are simulated with simple home-made simulation diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 1072e4b178bdf..aed42e4f98f0c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -299,7 +299,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (mC[0] > param.rec.tpc.trackFitCovLimit || mC[2] > param.rec.tpc.trackFitCovLimit) { break; } - MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagNotFit); + MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); nMissed2++; NTolerated++; CADEBUG(printf(" --- break (%d, %d)\n", err, err2)); @@ -334,7 +334,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ #endif GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification - retVal = GPUTPCGMPropagator::updateErrorEdgeCluster; + retVal = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; const float invSqrtCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? CAMath::InvSqrt(merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; @@ -363,7 +363,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ lastUpdateX = mX; covYYUpd = mC[0]; nMissed = nMissed2 = 0; - UnmarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagNotFit); + UnmarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); N++; ihitStart = ihit; float dy = mP[0] - prop.Model().Y(); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index a1993ec4a0ffa..b74c1bb6a4534 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -268,7 +268,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov if constexpr (std::is_same_v) { const auto& hit = mPtrackHits[trkX.FirstClusterRef() + i]; cl = &mPclusterNative->clustersLinear[hit.num]; - if (hit.state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit)) { + if (hit.state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl)) { cl = nullptr; if (i + direction != stop) { i += direction; From 132943deb4940ec93748ebbc419ccaa90ca29247 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:57:54 +0200 Subject: [PATCH 0366/1764] GPU: Use instead of GPUCommonArray for CUDA / ROCm, not yet for OpenCL... --- .../DCAFitter/include/DCAFitter/DCAFitterN.h | 68 +++++++++---------- .../MathUtils/include/MathUtils/SMatrixGPU.h | 8 +-- .../include/MathUtils/detail/basicMath.h | 11 +-- .../include/MathUtils/detail/trigonometric.h | 13 ++-- .../ITS/include/DataFormatsITS/TrackITS.h | 8 +-- .../include/ReconstructionDataFormats/DCA.h | 4 +- .../TrackParametrization.h | 10 +-- .../TrackParametrizationWithError.h | 8 +-- .../ReconstructionDataFormats/TrackUtils.h | 6 +- .../ReconstructionDataFormats/Vertex.h | 12 ++-- .../src/TrackParametrization.cxx | 6 +- .../src/TrackParametrizationWithError.cxx | 10 +-- Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 2 +- Detectors/Align/src/AlignableDetectorTPC.cxx | 4 +- .../Base/include/DetectorsBase/Propagator.h | 6 +- Detectors/Base/src/Propagator.cxx | 8 +-- .../postprocessing/studies/src/Efficiency.cxx | 12 ++-- .../studies/src/ImpactParameter.cxx | 2 +- .../GPU/ITStrackingGPU/VertexingKernels.h | 4 +- .../ITS/tracking/GPU/cuda/VertexingKernels.cu | 4 +- .../tracking/include/ITStracking/Cluster.h | 15 ++-- .../tracking/include/ITStracking/Constants.h | 28 ++++---- Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx | 8 ++- Detectors/TPC/qc/src/Tracks.cxx | 6 +- .../TPC/workflow/src/MIPTrackFilterSpec.cxx | 2 +- Detectors/TPC/workflow/src/TPCRefitter.cxx | 2 +- .../TPC/workflow/src/TPCTimeSeriesSpec.cxx | 10 +-- GPU/Common/GPUCommonArray.h | 12 ++-- GPU/Common/GPUCommonTypeTraits.h | 3 +- GPU/GPUTracking/Base/GPUStdSystemHeaders.h | 1 + .../Base/opencl/GPUReconstructionOCL.cl | 1 + GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 4 +- .../TRDTracking/GPUTRDInterfaces.h | 8 +-- 33 files changed, 156 insertions(+), 150 deletions(-) diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 569b3ea49e515..aac451f1f8978 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -110,11 +110,11 @@ class DCAFitterN using MatSymND = o2::math_utils::SMatrix>; using MatStdND = o2::math_utils::SMatrix>; using TrackCoefVtx = MatStd3D; - using ArrTrack = o2::gpu::gpustd::array; // container for prongs (tracks) at single vertex cand. - using ArrTrackCovI = o2::gpu::gpustd::array; // container for inv.cov.matrices at single vertex cand. - using ArrTrCoef = o2::gpu::gpustd::array; // container of TrackCoefVtx coefficients at single vertex cand. - using ArrTrDer = o2::gpu::gpustd::array; // container of Track 1st and 2nd derivative over their X param - using ArrTrPos = o2::gpu::gpustd::array; // container of Track positions + using ArrTrack = std::array; // container for prongs (tracks) at single vertex cand. + using ArrTrackCovI = std::array; // container for inv.cov.matrices at single vertex cand. + using ArrTrCoef = std::array; // container of TrackCoefVtx coefficients at single vertex cand. + using ArrTrDer = std::array; // container of Track 1st and 2nd derivative over their X param + using ArrTrPos = std::array; // container of Track positions public: enum BadCovPolicy : uint8_t { // if encountering non-positive defined cov. matrix, the choice is: @@ -158,7 +158,7 @@ class DCAFitterN GPUd() const auto getPCACandidatePos(int cand = 0) const { const auto& vd = mPCA[mOrder[cand]]; - return o2::gpu::gpustd::array{static_cast(vd[0]), static_cast(vd[1]), static_cast(vd[2])}; + return std::array{static_cast(vd[0]), static_cast(vd[1]), static_cast(vd[2])}; } ///< return position of quality-ordered candidate in the internal structures @@ -213,7 +213,7 @@ class DCAFitterN GPUd() MatSym3D calcPCACovMatrix(int cand = 0) const; - o2::gpu::gpustd::array calcPCACovMatrixFlat(int cand = 0) const + std::array calcPCACovMatrixFlat(int cand = 0) const { auto m = calcPCACovMatrix(cand); return {static_cast(m(0, 0)), static_cast(m(1, 0)), static_cast(m(1, 1)), static_cast(m(2, 0)), static_cast(m(2, 1)), static_cast(m(2, 2))}; @@ -364,39 +364,39 @@ class DCAFitterN private: // vectors of 1st derivatives of track local residuals over X parameters - o2::gpu::gpustd::array, N> mDResidDx; + std::array, N> mDResidDx; // vectors of 1nd derivatives of track local residuals over X parameters // (cross-derivatives DR/(dx_j*dx_k) = 0 for j!=k, therefore the hessian is diagonal) - o2::gpu::gpustd::array, N> mD2ResidDx2; + std::array, N> mD2ResidDx2; VecND mDChi2Dx; // 1st derivatives of chi2 over tracks X params MatSymND mD2Chi2Dx2; // 2nd derivatives of chi2 over tracks X params (symmetric matrix) MatSymND mCosDif; // matrix with cos(alp_j-alp_i) for j mOrigTrPtr; - o2::gpu::gpustd::array mTrAux; // Aux track info for each track at each cand. vertex - CrossInfo mCrossings; // info on track crossing - - o2::gpu::gpustd::array mTrcEInv; // errors for each track at each cand. vertex - o2::gpu::gpustd::array mCandTr; // tracks at each cond. vertex (Note: Errors are at seed XY point) - o2::gpu::gpustd::array mTrCFVT; // TrackCoefVtx for each track at each cand. vertex - o2::gpu::gpustd::array mTrDer; // Track derivativse - o2::gpu::gpustd::array mTrPos; // Track positions - o2::gpu::gpustd::array mTrRes; // Track residuals - o2::gpu::gpustd::array mPCA; // PCA for each vertex candidate - o2::gpu::gpustd::array mChi2 = {0}; // Chi2 at PCA candidate - o2::gpu::gpustd::array mNIters; // number of iterations for each seed - o2::gpu::gpustd::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA - o2::gpu::gpustd::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate + std::array mOrigTrPtr; + std::array mTrAux; // Aux track info for each track at each cand. vertex + CrossInfo mCrossings; // info on track crossing + + std::array mTrcEInv; // errors for each track at each cand. vertex + std::array mCandTr; // tracks at each cond. vertex (Note: Errors are at seed XY point) + std::array mTrCFVT; // TrackCoefVtx for each track at each cand. vertex + std::array mTrDer; // Track derivativse + std::array mTrPos; // Track positions + std::array mTrRes; // Track residuals + std::array mPCA; // PCA for each vertex candidate + std::array mChi2 = {0}; // Chi2 at PCA candidate + std::array mNIters; // number of iterations for each seed + std::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA + std::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate LogLogThrottler mLoggerBadCov{}; LogLogThrottler mLoggerBadInv{}; LogLogThrottler mLoggerBadProp{}; MatSym3D mWeightInv; // inverse weight of single track, [sum{M^T E M}]^-1 in EQ.T - o2::gpu::gpustd::array mOrder{0}; + std::array mOrder{0}; int mCurHyp = 0; int mCrossIDCur = 0; int mCrossIDAlt = -1; BadCovPolicy mBadCovPolicy{BadCovPolicy::Discard}; // what to do in case of non-pos-def. cov. matrix, see BadCovPolicy enum - o2::gpu::gpustd::array mFitStatus{}; // fit status of each hypothesis fit + std::array mFitStatus{}; // fit status of each hypothesis fit bool mAllowAltPreference = true; // if the fit converges to alternative PCA seed, abandon the current one bool mUseAbsDCA = false; // use abs. distance minimization rather than chi2 bool mWeightedFinalPCA = false; // recalculate PCA as a cov-matrix weighted mean, even if absDCA method was used @@ -657,7 +657,7 @@ template GPUd() void DCAFitterN::calcChi2Derivatives() { //< calculate 1st and 2nd derivatives of wighted DCA (chi2) over track parameters X, see EQ.Chi2 in the ref - o2::gpu::gpustd::array, N> covIDrDx; // tempory vectors of covI_j * dres_j/dx_i + std::array, N> covIDrDx; // tempory vectors of covI_j * dres_j/dx_i // chi2 1st derivative for (int i = N; i--;) { @@ -1175,13 +1175,13 @@ GPUd() o2::track::TrackParCov DCAFitterN::createParentTrackParCov(in { const auto& trP = getTrack(0, cand); const auto& trN = getTrack(1, cand); - o2::gpu::gpustd::array covV = {0.}; - o2::gpu::gpustd::array pvecV = {0.}; + std::array covV = {0.}; + std::array pvecV = {0.}; int q = 0; for (int it = 0; it < N; it++) { const auto& trc = getTrack(it, cand); - o2::gpu::gpustd::array pvecT = {0.}; - o2::gpu::gpustd::array covT = {0.}; + std::array pvecT = {0.}; + std::array covT = {0.}; trc.getPxPyPzGlo(pvecT); trc.getCovXYZPxPyPzGlo(covT); constexpr int MomInd[6] = {9, 13, 14, 18, 19, 20}; // cov matrix elements for momentum component @@ -1210,18 +1210,18 @@ GPUd() o2::track::TrackPar DCAFitterN::createParentTrackPar(int cand const auto& trP = getTrack(0, cand); const auto& trN = getTrack(1, cand); const auto& wvtx = getPCACandidate(cand); - o2::gpu::gpustd::array pvecV = {0.}; + std::array pvecV = {0.}; int q = 0; for (int it = 0; it < N; it++) { const auto& trc = getTrack(it, cand); - o2::gpu::gpustd::array pvecT = {0.}; + std::array pvecT = {0.}; trc.getPxPyPzGlo(pvecT); for (int i = 0; i < 3; i++) { pvecV[i] += pvecT[i]; } q += trc.getCharge(); } - const o2::gpu::gpustd::array vertex = {(float)wvtx[0], (float)wvtx[1], (float)wvtx[2]}; + const std::array vertex = {(float)wvtx[0], (float)wvtx[1], (float)wvtx[2]}; return o2::track::TrackPar(vertex, pvecV, q, sectorAlpha); } diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 675719cfc0751..8158a93666a92 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -25,12 +25,12 @@ #define ALICEO2_SMATRIX_GPU_H #include "GPUCommonDef.h" -#include "GPUCommonArray.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" #ifndef GPUCA_GPUCODE_DEVICE #include +#include #endif namespace o2::math_utils::detail @@ -283,14 +283,14 @@ struct make_indices : make_indices_impl<0, indices<>, N> { }; template -constexpr auto do_make(F f, indices) -> gpu::gpustd::array +constexpr auto do_make(F f, indices) -> std::array { - gpu::gpustd::array retarr = {f(I0 + I)...}; + std::array retarr = {f(I0 + I)...}; return retarr; } template -constexpr auto make(F f) -> gpu::gpustd::array +constexpr auto make(F f) -> std::array { return do_make(f, typename make_indices::type()); } diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3565764435a68..1abe6ee878c39 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -16,14 +16,15 @@ #ifndef MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ #define MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" +#include "CommonConstants/MathConstants.h" + #ifndef GPUCA_GPUCODE_DEVICE #include #include +#include #endif -#include "GPUCommonArray.h" -#include "GPUCommonDef.h" -#include "GPUCommonMath.h" -#include "CommonConstants/MathConstants.h" namespace o2 { @@ -130,4 +131,4 @@ GPUdi() double log(double x) } // namespace math_utils } // namespace o2 -#endif /* MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ */ \ No newline at end of file +#endif /* MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ */ diff --git a/Common/MathUtils/include/MathUtils/detail/trigonometric.h b/Common/MathUtils/include/MathUtils/detail/trigonometric.h index 462affdceb17f..457210202ca54 100644 --- a/Common/MathUtils/include/MathUtils/detail/trigonometric.h +++ b/Common/MathUtils/include/MathUtils/detail/trigonometric.h @@ -16,16 +16,17 @@ #ifndef MATHUTILS_INCLUDE_MATHUTILS_DETAIL_TRIGONOMETRIC_H_ #define MATHUTILS_INCLUDE_MATHUTILS_DETAIL_TRIGONOMETRIC_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#include -#endif -#include "GPUCommonArray.h" #include "GPUCommonDef.h" #include "GPUCommonMath.h" #include "CommonConstants/MathConstants.h" #include "MathUtils/detail/basicMath.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#include +#include +#endif + namespace o2 { namespace math_utils @@ -156,7 +157,7 @@ GPUhdi() std::tuple rotateZInv(T xG, T yG, T snAlp, T csAlp) #endif template -GPUhdi() void rotateZ(gpu::gpustd::array& xy, T alpha) +GPUhdi() void rotateZ(std::array& xy, T alpha) { // transforms vector in tracking frame alpha to global frame T sin, cos; diff --git a/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h b/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h index e9931b89ecd4a..06d4fba51bd54 100644 --- a/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h +++ b/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h @@ -170,14 +170,14 @@ class TrackITSExt : public TrackITS using TrackITS::TrackITS; // inherit base constructors GPUh() TrackITSExt(o2::track::TrackParCov&& parCov, short ncl, float chi2, - o2::track::TrackParCov&& outer, o2::gpu::gpustd::array cls) + o2::track::TrackParCov&& outer, std::array cls) : TrackITS(parCov, chi2, outer), mIndex{cls} { setNumberOfClusters(ncl); } GPUh() TrackITSExt(o2::track::TrackParCov& parCov, short ncl, float chi2, std::uint32_t rof, - o2::track::TrackParCov& outer, o2::gpu::gpustd::array cls) + o2::track::TrackParCov& outer, std::array cls) : TrackITS(parCov, chi2, outer), mIndex{cls} { setNumberOfClusters(ncl); @@ -205,13 +205,13 @@ class TrackITSExt : public TrackITS mIndex[layer] = idx; } - GPUh() o2::gpu::gpustd::array& getClusterIndexes() + GPUh() std::array& getClusterIndexes() { return mIndex; } private: - o2::gpu::gpustd::array mIndex = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; ///< Indices of associated clusters + std::array mIndex = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; ///< Indices of associated clusters ClassDefNV(TrackITSExt, 2); }; } // namespace its diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h index 922470f8992f5..6eb41b798e101 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h @@ -14,10 +14,10 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #ifndef GPUCA_GPUCODE_DEVICE #include +#include #endif /// \author ruben.shahoyan@cern.ch @@ -67,7 +67,7 @@ class DCA private: float mY = 0.f; float mZ = 0.f; - gpu::gpustd::array mCov; ///< s2y, syz, s2z + std::array mCov; ///< s2y, syz, s2z ClassDefNV(DCA, 1); }; diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index bfd56eb8f024f..f240e34861eeb 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,7 +29,6 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE @@ -39,6 +38,7 @@ #include #include #include +#include #endif #ifndef GPUCA_ALIGPUCODE // Used only by functions that are hidden on the GPU @@ -128,9 +128,9 @@ class TrackParametrization public: using value_t = value_T; - using dim2_t = gpu::gpustd::array; - using dim3_t = gpu::gpustd::array; - using params_t = gpu::gpustd::array; + using dim2_t = std::array; + using dim3_t = std::array; + using params_t = std::array; struct yzerr_t { // 2 measurement with error dim2_t yz; @@ -209,7 +209,7 @@ class TrackParametrization GPUd() math_utils::Point3D getXYZGlo() const; GPUd() void getXYZGlo(dim3_t& xyz) const; GPUd() bool getPxPyPzGlo(dim3_t& pxyz) const; - GPUd() bool getPosDirGlo(gpu::gpustd::array& posdirp) const; + GPUd() bool getPosDirGlo(std::array& posdirp) const; // methods for track params estimate at other point GPUd() bool getYZAt(value_t xk, value_t b, value_t& y, value_t& z) const; diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h index cc783298e14cd..cd9d1517a81b1 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h @@ -38,14 +38,14 @@ class TrackParametrizationWithError : public TrackParametrization static_assert(std::is_floating_point_v); #endif - using covMat_t = gpu::gpustd::array; + using covMat_t = std::array; using MatrixDSym5 = o2::math_utils::SMatrix>; using MatrixD5 = o2::math_utils::SMatrix>; GPUhd() TrackParametrizationWithError(); GPUd() TrackParametrizationWithError(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion); GPUd() TrackParametrizationWithError(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); + const std::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); GPUhdDefault() TrackParametrizationWithError(const TrackParametrizationWithError& src) = default; GPUdDefault() TrackParametrizationWithError(TrackParametrizationWithError&& src) = default; @@ -57,7 +57,7 @@ class TrackParametrizationWithError : public TrackParametrization using TrackParametrization::set; GPUd() void set(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion); GPUd() void set(value_t x, value_t alpha, const value_t* par, const value_t* cov, int charge = 1, const PID pid = PID::Pion); - GPUd() void set(const dim3_t& xyz, const dim3_t& pxpypz, const gpu::gpustd::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); + GPUd() void set(const dim3_t& xyz, const dim3_t& pxpypz, const std::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); GPUd() const covMat_t& getCov() const; GPUd() value_t getSigmaY2() const; GPUd() value_t getSigmaZY() const; @@ -77,7 +77,7 @@ class TrackParametrizationWithError : public TrackParametrization GPUd() value_t getCovarElem(int i, int j) const; GPUd() value_t getDiagError2(int i) const; - GPUd() bool getCovXYZPxPyPzGlo(gpu::gpustd::array& c) const; + GPUd() bool getCovXYZPxPyPzGlo(std::array& c) const; GPUd() void print() const; GPUd() void printHexadecimal(); diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h index 0ee0ca4461ab0..8a79130d64eda 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h @@ -18,9 +18,9 @@ #define INCLUDE_RECONSTRUCTIONDATAFORMATS_TRACKUTILS_H_ #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #ifndef GPUCA_GPUCODE_DEVICE +#include #include #endif @@ -39,11 +39,11 @@ template GPUd() value_T BetheBlochSolidOpt(value_T bg); template -GPUd() void g3helx3(value_T qfield, value_T step, gpu::gpustd::array& vect); +GPUd() void g3helx3(value_T qfield, value_T step, std::array& vect); //____________________________________________________ template -GPUd() void g3helx3(value_T qfield, value_T step, gpu::gpustd::array& vect) +GPUd() void g3helx3(value_T qfield, value_T step, std::array& vect) { /****************************************************************** * * diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h index d14bc7ac786c8..fc89f162a0727 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h @@ -14,7 +14,6 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include #include "CommonDataFormat/TimeStamp.h" @@ -22,6 +21,7 @@ #include #include #include +#include #endif namespace o2 @@ -42,7 +42,7 @@ class VertexBase static constexpr int kNCov = 6; GPUhdDefault() VertexBase() = default; GPUhdDefault() ~VertexBase() = default; - GPUhd() VertexBase(const math_utils::Point3D& pos, const gpu::gpustd::array& cov) : mPos(pos), mCov(cov) + GPUhd() VertexBase(const math_utils::Point3D& pos, const std::array& cov) : mPos(pos), mCov(cov) { } @@ -65,7 +65,7 @@ class VertexBase GPUd() float getSigmaY() const { return gpu::CAMath::Sqrt(getSigmaY2()); } GPUd() float getSigmaZ() const { return gpu::CAMath::Sqrt(getSigmaZ2()); } - GPUd() const gpu::gpustd::array& getCov() const { return mCov; } + GPUd() const std::array& getCov() const { return mCov; } GPUd() math_utils::Point3D getXYZ() const { return mPos; } GPUd() math_utils::Point3D& getXYZ() { return mPos; } @@ -101,14 +101,14 @@ class VertexBase setSigmaXZ(sxz); setSigmaYZ(syz); } - GPUd() void setCov(const gpu::gpustd::array& cov) { mCov = cov; } + GPUd() void setCov(const std::array& cov) { mCov = cov; } bool operator==(const VertexBase& other) const; bool operator!=(const VertexBase& other) const { return !(*this == other); } protected: math_utils::Point3D mPos{0., 0., 0.}; ///< cartesian position - gpu::gpustd::array mCov{}; ///< errors, see CovElems enum + std::array mCov{}; ///< errors, see CovElems enum ClassDefNV(VertexBase, 1); }; @@ -130,7 +130,7 @@ class Vertex : public VertexBase GPUhdDefault() Vertex() = default; GPUhdDefault() ~Vertex() = default; - GPUhd() Vertex(const math_utils::Point3D& pos, const gpu::gpustd::array& cov, ushort nCont, float chi2) + GPUhd() Vertex(const math_utils::Point3D& pos, const std::array& cov, ushort nCont, float chi2) : VertexBase(pos, cov), mChi2(chi2), mNContributors(nCont) { } diff --git a/DataFormats/Reconstruction/src/TrackParametrization.cxx b/DataFormats/Reconstruction/src/TrackParametrization.cxx index 4b68ea425bfbd..1bdf9b55a60a0 100644 --- a/DataFormats/Reconstruction/src/TrackParametrization.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -130,7 +130,7 @@ GPUd() bool TrackParametrization::getPxPyPzGlo(dim3_t& pxyz) const //____________________________________________________ template -GPUd() bool TrackParametrization::getPosDirGlo(gpu::gpustd::array& posdirp) const +GPUd() bool TrackParametrization::getPosDirGlo(std::array& posdirp) const { // fill vector with lab x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha value_t ptI = getPtInv(); @@ -231,7 +231,7 @@ GPUd() bool TrackParametrization::propagateParamTo(value_t xk, const di step *= gpu::CAMath::Sqrt(1.f + getTgl() * getTgl()); // // get the track x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha in the Global System - gpu::gpustd::array vecLab{0.f}; + std::array vecLab{0.f}; if (!getPosDirGlo(vecLab)) { return false; } @@ -250,7 +250,7 @@ GPUd() bool TrackParametrization::propagateParamTo(value_t xk, const di costet = b[2] / bb; sintet = bt / bb; } - gpu::gpustd::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], + std::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], -sinphi * vecLab[0] + cosphi * vecLab[1], sintet * cosphi * vecLab[0] + sintet * sinphi * vecLab[1] + costet * vecLab[2], costet * cosphi * vecLab[3] + costet * sinphi * vecLab[4] - sintet * vecLab[5], diff --git a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx index 81963adf79938..aee24238f1247 100644 --- a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx @@ -259,7 +259,7 @@ GPUd() bool TrackParametrizationWithError::propagateToDCA(const o2::dat //______________________________________________________________ template GPUd() TrackParametrizationWithError::TrackParametrizationWithError(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int charge, bool sectorAlpha, const PID pid) + const std::array& cv, int charge, bool sectorAlpha, const PID pid) { // construct track param and covariance from kinematics and lab errors set(xyz, pxpypz, cv, charge, sectorAlpha, pid); @@ -268,7 +268,7 @@ GPUd() TrackParametrizationWithError::TrackParametrizationWithError(con //______________________________________________________________ template GPUd() void TrackParametrizationWithError::set(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int charge, bool sectorAlpha, const PID pid) + const std::array& cv, int charge, bool sectorAlpha, const PID pid) { // set track param and covariance from kinematics and lab errors @@ -475,7 +475,7 @@ GPUd() bool TrackParametrizationWithError::propagateTo(value_t xk, cons step *= gpu::CAMath::Sqrt(1.f + this->getTgl() * this->getTgl()); // // get the track x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha in the Global System - gpu::gpustd::array vecLab{0.f}; + std::array vecLab{0.f}; if (!this->getPosDirGlo(vecLab)) { return false; } @@ -542,7 +542,7 @@ GPUd() bool TrackParametrizationWithError::propagateTo(value_t xk, cons costet = b[2] / bb; sintet = bt / bb; } - gpu::gpustd::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], + std::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], -sinphi * vecLab[0] + cosphi * vecLab[1], sintet * cosphi * vecLab[0] + sintet * sinphi * vecLab[1] + costet * vecLab[2], costet * cosphi * vecLab[3] + costet * sinphi * vecLab[4] - sintet * vecLab[5], @@ -1115,7 +1115,7 @@ GPUd() bool TrackParametrizationWithError::correctForMaterial(value_t x //______________________________________________________________ template -GPUd() bool TrackParametrizationWithError::getCovXYZPxPyPzGlo(gpu::gpustd::array& cv) const +GPUd() bool TrackParametrizationWithError::getCovXYZPxPyPzGlo(std::array& cv) const { //--------------------------------------------------------------------- // This function returns the global covariance matrix of the track params diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 2b133770357fc..8fb2db9c3c9a6 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -2615,7 +2615,7 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int o2::track::TrackParametrization tpcTMP = tpcOrig; /// get backup of the track const o2::base::Propagator::MatCorrType mMatType = o2::base::Propagator::MatCorrType::USEMatCorrLUT; /// should be parameterized const o2::dataformats::VertexBase v = mVtx.getMeanVertex(collisionID < 0 ? 0.f : data.getPrimaryVertex(collisionID).getZ()); - o2::gpu::gpustd::array dcaInfo{-999., -999.}; + std::array dcaInfo{-999., -999.}; if (prop->propagateToDCABxByBz({v.getX(), v.getY(), v.getZ()}, tpcTMP, 2.f, mMatType, &dcaInfo)) { trackQAHolder.tpcdcaR = 100. * dcaInfo[0] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); trackQAHolder.tpcdcaZ = 100. * dcaInfo[1] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); diff --git a/Detectors/Align/src/AlignableDetectorTPC.cxx b/Detectors/Align/src/AlignableDetectorTPC.cxx index f66d9e3f3ab95..b3d2102559974 100644 --- a/Detectors/Align/src/AlignableDetectorTPC.cxx +++ b/Detectors/Align/src/AlignableDetectorTPC.cxx @@ -214,8 +214,8 @@ int AlignableDetectorTPC::processPoints(GIndex gid, int npntCut, bool inv) auto* sectSensor = (AlignableSensorTPC*)getSensor(currentSector); const auto* sysE = sectSensor->getAddError(); // additional syst error - gpu::gpustd::array p = {y, z}; - gpu::gpustd::array c = {0, 0, 0}; + std::array p = {y, z}; + std::array c = {0, 0, 0}; mController->getTPCParam()->GetClusterErrors2(sector, currentRow, z, trkParam.getSnp(), trkParam.getTgl(), -1.f, 0.f, 0.f, c[0], c[2]); // TODO: Note this disables occupancy / charge components of the error estimation mController->getTPCParam()->UpdateClusterError2ByState(clusterState, c[0], c[2]); int nrComb = std::abs(row - currentRow) + 1; diff --git a/Detectors/Base/include/DetectorsBase/Propagator.h b/Detectors/Base/include/DetectorsBase/Propagator.h index a9e2ce6e0383d..dbdef47e4edc0 100644 --- a/Detectors/Base/include/DetectorsBase/Propagator.h +++ b/Detectors/Base/include/DetectorsBase/Propagator.h @@ -17,7 +17,6 @@ #define ALICEO2_BASE_PROPAGATOR_ #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #include "CommonConstants/PhysicsConstants.h" #include "ReconstructionDataFormats/Track.h" #include "ReconstructionDataFormats/DCA.h" @@ -25,6 +24,7 @@ #include "DetectorsBase/MatLayerCylSet.h" #ifndef GPUCA_GPUCODE +#include #include #endif @@ -111,12 +111,12 @@ class PropagatorImpl GPUd() bool propagateToDCA(const o2::math_utils::Point3D& vtx, o2::track::TrackParametrization& track, value_type bZ, value_type maxStep = MAX_STEP, MatCorrType matCorr = MatCorrType::USEMatCorrLUT, - gpu::gpustd::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, + std::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, int signCorr = 0, value_type maxD = 999.f) const; GPUd() bool propagateToDCABxByBz(const o2::math_utils::Point3D& vtx, o2::track::TrackParametrization& track, value_type maxStep = MAX_STEP, MatCorrType matCorr = MatCorrType::USEMatCorrLUT, - gpu::gpustd::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, + std::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, int signCorr = 0, value_type maxD = 999.f) const; PropagatorImpl(PropagatorImpl const&) = delete; diff --git a/Detectors/Base/src/Propagator.cxx b/Detectors/Base/src/Propagator.cxx index 754c0c14e6f60..1c44cea65c69c 100644 --- a/Detectors/Base/src/Propagator.cxx +++ b/Detectors/Base/src/Propagator.cxx @@ -170,7 +170,7 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackParCov_t& track, va signCorr = -dir; // sign of eloss correction is not imposed } - gpu::gpustd::array b{}; + std::array b{}; while (math_utils::detail::abs(dx) > Epsilon) { auto step = math_utils::detail::min(math_utils::detail::abs(dx), maxStep); if (dir < 0) { @@ -239,7 +239,7 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackPar_t& track, value signCorr = -dir; // sign of eloss correction is not imposed } - gpu::gpustd::array b{}; + std::array b{}; while (math_utils::detail::abs(dx) > Epsilon) { auto step = math_utils::detail::min(math_utils::detail::abs(dx), maxStep); if (dir < 0) { @@ -553,7 +553,7 @@ GPUd() bool PropagatorImpl::propagateToDCABxByBz(const o2::dataformats: template GPUd() bool PropagatorImpl::propagateToDCA(const math_utils::Point3D& vtx, TrackPar_t& track, value_type bZ, value_type maxStep, PropagatorImpl::MatCorrType matCorr, - gpu::gpustd::array* dca, track::TrackLTIntegral* tofInfo, + std::array* dca, track::TrackLTIntegral* tofInfo, int signCorr, value_type maxD) const { // propagate track to DCA to the vertex @@ -601,7 +601,7 @@ GPUd() bool PropagatorImpl::propagateToDCA(const math_utils::Point3D GPUd() bool PropagatorImpl::propagateToDCABxByBz(const math_utils::Point3D& vtx, TrackPar_t& track, value_type maxStep, PropagatorImpl::MatCorrType matCorr, - gpu::gpustd::array* dca, track::TrackLTIntegral* tofInfo, + std::array* dca, track::TrackLTIntegral* tofInfo, int signCorr, value_type maxD) const { // propagate track to DCA to the vertex diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx index bca1ec1e85001..494603641cde5 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx @@ -593,7 +593,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) LOGP(info, "--------------- getDCAClusterTrackMC"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); auto bz = o2::base::Propagator::Instance()->getNominalBz(); @@ -833,7 +833,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() LOGP(info, "--------------- countDuplicatedAfterCuts"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -1019,7 +1019,7 @@ void EfficiencyStudy::studyDCAcutsMC() // if not, keep it as a fake match -> increase the fake match counter // the efficiency of each one will be match counter / total of the duplicated clusters o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -1346,7 +1346,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -2176,7 +2176,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) LOGP(info, "getEfficiency()"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -2860,4 +2860,4 @@ DataProcessorSpec getEfficiencyStudy(mask_t srcTracksMask, mask_t srcClustersMas Options{}}; } -} // namespace o2::its::study \ No newline at end of file +} // namespace o2::its::study diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx index 5ca1bf2bd5c8f..c0aaabddaca1b 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx @@ -358,7 +358,7 @@ void ImpactParameterStudy::process(o2::globaltracking::RecoContainer& recoData) auto trueID = trueVec_globID_contr[it]; const o2::track::TrackParCov& trc = recoData.getTrackParam(trueID); auto pt = trc.getPt(); - o2::gpu::gpustd::array dcaInfo{-999., -999.}; + std::array dcaInfo{-999., -999.}; // LOGP(info, " ---> Bz={}", o2::base::Propagator::Instance()->getNominalBz()); o2::track::TrackPar trcTmp{trc}; if (o2::base::Propagator::Instance()->propagateToDCABxByBz({Pvtx_refitted.getX(), Pvtx_refitted.getY(), Pvtx_refitted.getZ()}, trcTmp, 2.f, matCorr, &dcaInfo)) { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h index 6ae042d081688..059b1cdc29082 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h @@ -22,8 +22,6 @@ #include "ITStrackingGPU/VertexerTraitsGPU.h" #include "ITStrackingGPU/TracerGPU.h" -#include "GPUCommonArray.h" - namespace o2::its::gpu { #ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler @@ -56,4 +54,4 @@ void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2 const float phiCut, const size_t maxTrackletsPerCluster = 1e2); } // namespace o2::its::gpu -#endif \ No newline at end of file +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu index 3aab0624ef556..acbd77585df37 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu @@ -564,7 +564,7 @@ GPUg() void computeVertexKernel( histZ[iBin] = 0; } if (sumWZ > minContributors || vertIndex == 0) { - new (vertices + vertIndex) Vertex{o2::math_utils::Point3D(beamPosition[0], beamPosition[1], wZ / sumWZ), o2::gpu::gpustd::array{ex, 0, ey, 0, 0, ez}, static_cast(sumWZ), 0}; + new (vertices + vertIndex) Vertex{o2::math_utils::Point3D(beamPosition[0], beamPosition[1], wZ / sumWZ), std::array{ex, 0, ey, 0, 0, ez}, static_cast(sumWZ), 0}; } else { new (vertices + vertIndex) Vertex{}; } @@ -577,4 +577,4 @@ GPUg() void computeVertexKernel( */ } // namespace gpu } // namespace its -} // namespace o2 \ No newline at end of file +} // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h index 0f136edfebfb3..2bf1316470316 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h @@ -16,15 +16,14 @@ #ifndef TRACKINGITSU_INCLUDE_CACLUSTER_H_ #define TRACKINGITSU_INCLUDE_CACLUSTER_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#endif - #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #include "ITStracking/Definitions.h" #include "ITStracking/MathUtils.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + namespace o2 { namespace its @@ -61,15 +60,15 @@ GPUhdi() void Cluster::print() const struct TrackingFrameInfo { TrackingFrameInfo() = default; - TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, o2::gpu::gpustd::array&& posTF, o2::gpu::gpustd::array&& covTF); + TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, std::array&& posTF, std::array&& covTF); float xCoordinate; float yCoordinate; float zCoordinate; float xTrackingFrame; float alphaTrackingFrame; - o2::gpu::gpustd::array positionTrackingFrame = {-1., -1.}; - o2::gpu::gpustd::array covarianceTrackingFrame = {999., 999., 999.}; + std::array positionTrackingFrame = {-1., -1.}; + std::array covarianceTrackingFrame = {999., 999., 999.}; GPUdi() void print() const { #if !defined(GPUCA_GPUCODE_DEVICE) || (!defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT)) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h index da02149fbc432..ec075b0f10d04 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h @@ -16,17 +16,17 @@ #ifndef TRACKINGITSU_INCLUDE_CONSTANTS_H_ #define TRACKINGITSU_INCLUDE_CONSTANTS_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#include -#endif - #include "ITStracking/Definitions.h" #include "CommonConstants/MathConstants.h" #include "GPUCommonMath.h" #include "GPUCommonDef.h" -#include "GPUCommonArray.h" + +#ifndef GPUCA_GPUCODE_DEVICE +#include +#include +#include +#endif namespace o2 { @@ -54,9 +54,9 @@ constexpr int ClustersPerCell{3}; constexpr int UnusedIndex{-1}; constexpr float Resolution{0.0005f}; -GPUhdi() constexpr o2::gpu::gpustd::array VertexerHistogramVolume() +GPUhdi() constexpr std::array VertexerHistogramVolume() { - return o2::gpu::gpustd::array{{1.98, 1.98, 40.f}}; + return std::array{{1.98, 1.98, 40.f}}; } } // namespace its @@ -66,24 +66,24 @@ constexpr int LayersNumber{7}; constexpr int TrackletsPerRoad{LayersNumber - 1}; constexpr int CellsPerRoad{LayersNumber - 2}; -GPUhdi() constexpr o2::gpu::gpustd::array LayersZCoordinate() +GPUhdi() constexpr std::array LayersZCoordinate() { constexpr double s = 1.; // safety margin - return o2::gpu::gpustd::array{16.333f + s, 16.333f + s, 16.333f + s, 42.140f + s, 42.140f + s, 73.745f + s, 73.745f + s}; + return std::array{16.333f + s, 16.333f + s, 16.333f + s, 42.140f + s, 42.140f + s, 73.745f + s, 73.745f + s}; } -GPUhdi() constexpr o2::gpu::gpustd::array LayersRCoordinate() +GPUhdi() constexpr std::array LayersRCoordinate() { - return o2::gpu::gpustd::array{{2.33959f, 3.14076f, 3.91924f, 19.6213f, 24.5597f, 34.388f, 39.3329f}}; + return std::array{{2.33959f, 3.14076f, 3.91924f, 19.6213f, 24.5597f, 34.388f, 39.3329f}}; } constexpr int ZBins{256}; constexpr int PhiBins{128}; constexpr float InversePhiBinSize{PhiBins / constants::math::TwoPi}; -GPUhdi() constexpr o2::gpu::gpustd::array InverseZBinSize() +GPUhdi() constexpr std::array InverseZBinSize() { constexpr auto zSize = LayersZCoordinate(); - return o2::gpu::gpustd::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), + return std::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), 0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]), 0.5f * ZBins / (zSize[6])}; } diff --git a/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx b/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx index 630ad9acf59d2..1557c636e2345 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx @@ -17,7 +17,9 @@ #include "ITStracking/MathUtils.h" #include "ITStracking/IndexTableUtils.h" -#include "GPUCommonArray.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif namespace o2 { @@ -92,8 +94,8 @@ bool Cluster::operator==(const Cluster& rhs) const this->indexTableBinIndex == rhs.indexTableBinIndex; } -TrackingFrameInfo::TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, o2::gpu::gpustd::array&& posTF, - o2::gpu::gpustd::array&& covTF) +TrackingFrameInfo::TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, std::array&& posTF, + std::array&& covTF) : xCoordinate{x}, yCoordinate{y}, zCoordinate{z}, xTrackingFrame{xTF}, alphaTrackingFrame{alpha}, positionTrackingFrame{posTF}, covarianceTrackingFrame{covTF} { // Nothing to do diff --git a/Detectors/TPC/qc/src/Tracks.cxx b/Detectors/TPC/qc/src/Tracks.cxx index 8e6f0d702df1b..5f29e80c89d2e 100644 --- a/Detectors/TPC/qc/src/Tracks.cxx +++ b/Detectors/TPC/qc/src/Tracks.cxx @@ -13,6 +13,7 @@ #include #include +#include // root includes #include "TFile.h" @@ -21,7 +22,6 @@ // o2 includes #include "DataFormatsTPC/TrackTPC.h" #include "DataFormatsTPC/dEdxInfo.h" -#include "GPUCommonArray.h" #include "DetectorsBase/Propagator.h" #include "TPCQC/Tracks.h" #include "TPCQC/Helpers.h" @@ -179,7 +179,7 @@ bool Tracks::processTrack(const o2::tpc::TrackTPC& track) if (propagator->getMatLUT() && propagator->hasMagFieldSet()) { // ---| fill DCA histos |--- - o2::gpu::gpustd::array dca; + std::array dca; o2::track::TrackPar propTrack(track); if (propagator->propagateToDCABxByBz(mPositionOfPV, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { const auto phi = o2::math_utils::to02PiGen(track.getPhi()); @@ -348,4 +348,4 @@ void Tracks::dumpToFile(std::string_view filename) arr.Write(arr.GetName(), TObject::kSingleKey); } f->Close(); -} \ No newline at end of file +} diff --git a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx index 1329dea236b1f..33b9039298264 100644 --- a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx +++ b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx @@ -178,7 +178,7 @@ bool MIPTrackFilterDevice::acceptDCA(const TrackTPC& track) } auto propagator = o2::base::Propagator::Instance(); - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); diff --git a/Detectors/TPC/workflow/src/TPCRefitter.cxx b/Detectors/TPC/workflow/src/TPCRefitter.cxx index 3ebe32d12ddb8..b2e41c8e808da 100644 --- a/Detectors/TPC/workflow/src/TPCRefitter.cxx +++ b/Detectors/TPC/workflow/src/TPCRefitter.cxx @@ -421,7 +421,7 @@ void TPCRefitterSpec::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) bool TPCRefitterSpec::getDCAs(const o2::track::TrackPar& track, float& dcar, float& dcaz) { auto propagator = o2::base::Propagator::Instance(); - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); diff --git a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx index 4d20654d07c83..a9f1e7d71da8e 100644 --- a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx @@ -1143,7 +1143,7 @@ class TPCTimeSeries : public Task auto propagator = o2::base::Propagator::Instance(); // propagate track to DCA - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; // coarse propagation @@ -1252,7 +1252,7 @@ class TPCTimeSeries : public Task // make propagation for ITS-TPC Track // check if the track was assigned to ITS track - o2::gpu::gpustd::array dcaITSTPC{0, 0}; + std::array dcaITSTPC{0, 0}; float deltaP0 = -999; float deltaP1 = -999; float deltaP2 = -999; @@ -1270,7 +1270,7 @@ class TPCTimeSeries : public Task // store TPC only DCAs // propagate to vertex in case the track belongs to vertex const bool contributeToVertex = (idxITSTPC.back() != -1); - o2::gpu::gpustd::array dcaITSTPCTmp{-1, -1}; + std::array dcaITSTPCTmp{-1, -1}; if (contributeToVertex) { if (propagator->propagateToDCA(vertex.getXYZ(), trackITSTPCTmp, propagator->getNominalBz(), mFineStep, mMatType, &dcaITSTPCTmp)) { @@ -1279,7 +1279,7 @@ class TPCTimeSeries : public Task } // propagate TPC track to vertex - o2::gpu::gpustd::array dcaTPCTmp{-1, -1}; + std::array dcaTPCTmp{-1, -1}; if (propagator->propagateToDCA(vertex.getXYZ(), track, propagator->getNominalBz(), mFineStep, mMatType, &dcaTPCTmp)) { dcaTPCAtVertex = dcaTPCTmp[0]; } @@ -1401,7 +1401,7 @@ class TPCTimeSeries : public Task const bool contributeToVertex = (idxITSTPC.back() != -1); if (hasITSTPC && contributeToVertex) { o2::track::TrackParCov trackITSTPCTmp = tracksITSTPC[idxITSTPC.front()]; - o2::gpu::gpustd::array dcaITSTPCTmp{-1, -1}; + std::array dcaITSTPCTmp{-1, -1}; if (propagator->propagateToDCA(vertex.getXYZ(), trackITSTPCTmp, propagator->getNominalBz(), mFineStep, mMatType, &dcaITSTPCTmp)) { o2::track::TrackParCov trackTPC = tracksTPC[iTrk]; if (trackTPC.rotate(trackITSTPCTmp.getAlpha()) && propagator->propagateTo(trackTPC, trackITSTPCTmp.getX(), false, mMaxSnp, mFineStep, mMatType)) { diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index c9babbf5548b4..e83ca8c4a69fc 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -15,12 +15,14 @@ #ifndef GPUCOMMONARRAY_H #define GPUCOMMONARRAY_H -#ifndef GPUCA_GPUCODE_DEVICE +#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) // TODO: Get rid of GPUCommonArray once OpenCL supports +#ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif +#else #include "GPUCommonDef.h" -namespace o2::gpu::gpustd +namespace std { #ifdef GPUCA_GPUCODE_DEVICE template @@ -43,5 +45,7 @@ GPUd() array(T, E...)->array; template using array = std::array; #endif -} // namespace o2::gpu::gpustd -#endif \ No newline at end of file +} // namespace std +#endif + +#endif // GPUCOMMONARRAY_H diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index f837019c11875..a51a4ac50683f 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -17,12 +17,11 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) +#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) // TODO: Get rid of GPUCommonTypeTraits once OpenCL supports #ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif #else -// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template diff --git a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h index 6598085d309c7..08f9be7d98380 100644 --- a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h +++ b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h @@ -21,5 +21,6 @@ #include #include #include +#include #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 3f58c0fea75e9..ea74c43703597 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -72,6 +72,7 @@ typedef signed char int8_t; #include "GPUCommonDef.h" #include "GPUCommonTypeTraits.h" // TODO: Once possible in OpenCL, should use GPUStdSystemHeaders.h here +#include "GPUCommonArray.h" // TODO: Same #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index b74c1bb6a4534..7c7ce8d07ffb9 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -387,8 +387,8 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov TrackParCovChi2 = 0.f; } CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", "", trk.getAlpha(), x, trk.getParams()[0], trk.getParams()[1], trk.getParams()[4], trk.getParams()[4], trk.getParams()[2], trk.getParams()[2], trk.getParams()[0] - y, trk.getParams()[1] - z, sqrtf(trk.getCov()[0]), sqrtf(trk.getCov()[2]), sqrtf(trk.getCov()[5]), sqrtf(trk.getCov()[14]), trk.getCov()[10])); - gpu::gpustd::array p = {y, z}; - gpu::gpustd::array c = {0, 0, 0}; + std::array p = {y, z}; + std::array c = {0, 0, 0}; GPUTPCGMPropagator::GetErr2(c[0], c[2], *mPparam, getPar(trk)[2], getPar(trk)[3], z, x, y, currentRow, clusterState, sector, time, invAvgCharge, invCharge, false); TrackParCovChi2 += trk.getPredictedChi2(p, c); if (!trk.update(p, c)) { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index f6b8bea29822a..d26d46495069b 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -59,8 +59,8 @@ class propagatorInterface GPUdi() bool update(const float p[2], const float cov[3]) { if (mParam) { - gpustd::array pTmp = {p[0], p[1]}; - gpustd::array covTmp = {cov[0], cov[1], cov[2]}; + std::array pTmp = {p[0], p[1]}; + std::array covTmp = {cov[0], cov[1], cov[2]}; return mParam->update(pTmp, covTmp); } else { return false; @@ -69,8 +69,8 @@ class propagatorInterface GPUdi() float getPredictedChi2(const float p[2], const float cov[3]) { if (mParam) { - gpustd::array pTmp = {p[0], p[1]}; - gpustd::array covTmp = {cov[0], cov[1], cov[2]}; + std::array pTmp = {p[0], p[1]}; + std::array covTmp = {cov[0], cov[1], cov[2]}; return mParam->getPredictedChi2(pTmp, covTmp); } else { return 99999.f; From a850e9eb3e6a634a1e87a70170c05ad6d8bce3af Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 22:18:30 +0200 Subject: [PATCH 0367/1764] GPU Common: Workaround for removing gpustd::array, temporary alias for O2Physics --- .../ReconstructionDataFormats/TrackParametrization.h | 1 + GPU/Common/GPUCommonArray.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index f240e34861eeb..1d0a5f1a9f1fd 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,6 +29,7 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" +#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index e83ca8c4a69fc..fa86d7bb4a021 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -48,4 +48,10 @@ using array = std::array; } // namespace std #endif +namespace o2::gpu::gpustd +{ +template +using array = ::std::array; // temporary alias, to remove dependent types +} // o2::gpu::gpustd + #endif // GPUCOMMONARRAY_H From b3c66e8a48c77a932a914a5adcb3562ebef3487a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 16 May 2025 09:53:57 +0200 Subject: [PATCH 0368/1764] DPL: Drop obsolete API (#14279) The new plugin based mechanism does not need the bulk insertion anymore. --- .../Core/include/Framework/TableBuilder.h | 106 +----------------- .../Core/test/benchmark_TableBuilder.cxx | 33 ------ Framework/Core/test/test_TableBuilder.cxx | 34 ------ 3 files changed, 4 insertions(+), 169 deletions(-) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 8d7601cefc634..0b35d5be083e4 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -42,12 +42,6 @@ class Table; class Array; } // namespace arrow -template -struct BulkInfo { - const T ptr; - size_t size; -}; - extern template class arrow::NumericBuilder; extern template class arrow::NumericBuilder; extern template class arrow::NumericBuilder; @@ -200,34 +194,6 @@ struct BuilderUtils { } } - template - static arrow::Status bulkAppend(HolderType& holder, size_t bulkSize, const PTR ptr) - { - return holder.builder->AppendValues(ptr, bulkSize, nullptr); - } - - template - static arrow::Status bulkAppendChunked(HolderType& holder, BulkInfo info) - { - // Appending nullptr is a no-op. - if (info.ptr == nullptr) { - return arrow::Status::OK(); - } - if constexpr (std::is_same_v>) { - if (appendToList>(holder.builder, info.ptr, info.size).ok() == false) { - throw runtime_error("Unable to append to column"); - } else { - return arrow::Status::OK(); - } - } else { - if (holder.builder->AppendValues(info.ptr, info.size, nullptr).ok() == false) { - throw runtime_error("Unable to append to column"); - } else { - return arrow::Status::OK(); - } - } - } - template static arrow::Status append(HolderType& holder, std::pair ip) { @@ -518,14 +484,6 @@ struct TableBuilderHelpers { return {BuilderTraits::make_datatype()...}; } - template - static std::vector> makeFields(std::array const& names) - { - char const* const* names_ptr = names.data(); - return { - std::make_shared(*names_ptr++, BuilderMaker::make_datatype(), true, nullptr)...}; - } - /// Invokes the append method for each entry in the tuple template static bool append(std::tuple& holders, VALUES&& values) @@ -542,19 +500,6 @@ struct TableBuilderHelpers { (BuilderUtils::unsafeAppend(std::get(holders), std::get(values)), ...); } - template - static bool bulkAppend(std::tuple& holders, size_t bulkSize, PTRS ptrs) - { - return (BuilderUtils::bulkAppend(std::get(holders), bulkSize, std::get(ptrs)).ok() && ...); - } - - /// Return true if all columns are done. - template - static bool bulkAppendChunked(std::tuple& holders, INFOS infos) - { - return (BuilderUtils::bulkAppendChunked(std::get(holders), std::get(infos)).ok() && ...); - } - /// Invokes the append method for each entry in the tuple template static bool finalize(std::vector>& arrays, std::tuple& holders) @@ -575,15 +520,9 @@ constexpr auto tuple_to_pack(std::tuple&&) return framework::pack{}; } -template -concept BulkInsertable = (std::integral> && !std::same_as>); - template struct InsertionTrait { - static consteval DirectInsertion policy() - requires(!BulkInsertable); - static consteval CachedInsertion policy() - requires(BulkInsertable); + static consteval DirectInsertion policy(); using Policy = decltype(policy()); }; @@ -658,7 +597,9 @@ class TableBuilder template auto makeBuilders(std::array const& columnNames, size_t nRows) { - mSchema = std::make_shared(TableBuilderHelpers::makeFields(columnNames)); + char const* const* names_ptr = columnNames.data(); + mSchema = std::make_shared( + std::vector>({std::make_shared(*names_ptr++, BuilderMaker::make_datatype(), true, nullptr)...})); mHolders = makeHolders(mMemoryPool, nRows); mFinalizer = [](std::vector>& arrays, void* holders) -> bool { @@ -768,45 +709,6 @@ class TableBuilder }(typename T::table_t::persistent_columns_t{}); } - template - auto preallocatedPersist(std::array const& columnNames, int nRows) - { - constexpr size_t nColumns = NCOLUMNS; - validate(); - mArrays.resize(nColumns); - makeBuilders(columnNames, nRows); - - // Callback used to fill the builders - return [holders = mHolders](unsigned int /*slot*/, typename BuilderMaker::FillType... args) -> void { - TableBuilderHelpers::unsafeAppend(*(HoldersTupleIndexed*)holders, std::forward_as_tuple(args...)); - }; - } - - template - auto bulkPersist(std::array const& columnNames, size_t nRows) - { - validate(); - // Should not be called more than once - mArrays.resize(NCOLUMNS); - makeBuilders(columnNames, nRows); - - return [holders = mHolders](unsigned int /*slot*/, size_t batchSize, typename BuilderMaker::FillType const*... args) -> void { - TableBuilderHelpers::bulkAppend(*(HoldersTupleIndexed*)holders, batchSize, std::forward_as_tuple(args...)); - }; - } - - template - auto bulkPersistChunked(std::array const& columnNames, size_t nRows) - { - validate(); - mArrays.resize(NCOLUMNS); - makeBuilders(columnNames, nRows); - - return [holders = mHolders](unsigned int /*slot*/, BulkInfo::STLValueType const*>... args) -> bool { - return TableBuilderHelpers::bulkAppendChunked(*(HoldersTupleIndexed*)holders, std::forward_as_tuple(args...)); - }; - } - /// Reserve method to expand the columns as needed. template auto reserveArrays(std::tuple& holders, int s) diff --git a/Framework/Core/test/benchmark_TableBuilder.cxx b/Framework/Core/test/benchmark_TableBuilder.cxx index 59d1450e895bd..5b9dee866c8a3 100644 --- a/Framework/Core/test/benchmark_TableBuilder.cxx +++ b/Framework/Core/test/benchmark_TableBuilder.cxx @@ -62,39 +62,6 @@ static void BM_TableBuilderScalarReserved(benchmark::State& state) BENCHMARK(BM_TableBuilderScalarReserved)->Arg(1 << 21); BENCHMARK(BM_TableBuilderScalarReserved)->Range(8, 8 << 16); -static void BM_TableBuilderScalarPresized(benchmark::State& state) -{ - using namespace o2::framework; - for (auto _ : state) { - TableBuilder builder; - auto rowWriter = builder.preallocatedPersist({"x"}, state.range(0)); - for (auto i = 0; i < state.range(0); ++i) { - rowWriter(0, 0.f); - } - auto table = builder.finalize(); - } -} - -BENCHMARK(BM_TableBuilderScalarPresized)->Arg(1 << 20); -BENCHMARK(BM_TableBuilderScalarPresized)->Range(8, 8 << 16); - -static void BM_TableBuilderScalarBulk(benchmark::State& state) -{ - using namespace o2::framework; - auto chunkSize = state.range(0) / 256; - std::vector buffer(chunkSize, 0.); // We assume data is chunked in blocks 256th of the total size - for (auto _ : state) { - TableBuilder builder; - auto bulkWriter = builder.bulkPersist({"x"}, state.range(0)); - for (auto i = 0; i < state.range(0) / chunkSize; ++i) { - bulkWriter(0, chunkSize, buffer.data()); - } - auto table = builder.finalize(); - } -} - -BENCHMARK(BM_TableBuilderScalarBulk)->Range(256, 1 << 20); - static void BM_TableBuilderSimple(benchmark::State& state) { using namespace o2::framework; diff --git a/Framework/Core/test/test_TableBuilder.cxx b/Framework/Core/test/test_TableBuilder.cxx index b08fee5ad4e6a..00cbbbc59b725 100644 --- a/Framework/Core/test/test_TableBuilder.cxx +++ b/Framework/Core/test/test_TableBuilder.cxx @@ -162,30 +162,6 @@ TEST_CASE("TestTableBuilderStruct") } } -TEST_CASE("TestTableBuilderBulk") -{ - using namespace o2::framework; - TableBuilder builder; - auto bulkWriter = builder.bulkPersist({"x", "y"}, 10); - int x[] = {0, 1, 2, 3, 4, 5, 6, 7}; - int y[] = {0, 1, 2, 3, 4, 5, 6, 7}; - - bulkWriter(0, 8, x, y); - - auto table = builder.finalize(); - REQUIRE(table->num_columns() == 2); - REQUIRE(table->num_rows() == 8); - REQUIRE(table->schema()->field(0)->name() == "x"); - REQUIRE(table->schema()->field(1)->name() == "y"); - REQUIRE(table->schema()->field(0)->type()->id() == arrow::int32()->id()); - REQUIRE(table->schema()->field(1)->type()->id() == arrow::int32()->id()); - - for (int64_t i = 0; i < 8; ++i) { - auto p = std::dynamic_pointer_cast>(table->column(0)->chunk(0)); - REQUIRE(p->Value(i) == i); - } -} - TEST_CASE("TestTableBuilderMore") { using namespace o2::framework; @@ -288,13 +264,3 @@ TEST_CASE("TestColumnCount") int count2 = TableBuilder::countColumns(); REQUIRE(count2 == 3); } - -TEST_CASE("TestMakeFields") -{ - auto fields = TableBuilderHelpers::makeFields({"i", "f"}); - REQUIRE(fields.size() == 2); - REQUIRE(fields[0]->name() == "i"); - REQUIRE(fields[1]->name() == "f"); - REQUIRE(fields[0]->type()->name() == "int32"); - REQUIRE(fields[1]->type()->name() == "float"); -} From d89ef683194da14326ccc2915810cf402d6921d7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 15 May 2025 23:29:48 +0200 Subject: [PATCH 0369/1764] Promote ITS/MFT wrong orbit/ROF count messages to critical --- Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx | 2 +- Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx b/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx index a9ed2748ec004..187a1bc114ee7 100644 --- a/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx +++ b/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx @@ -125,7 +125,7 @@ bool RUDecodeData::checkLinkInSync(int icab, const o2::InteractionRecord ir) link->statistics.errorCounts[GBTLinkDecodingStat::ErrOldROF]++; linkHBFToDump[(uint64_t(link->subSpec) << 32) + link->hbfEntry] = link->irHBF.orbit; if (link->needToPrintError(link->statistics.errorCounts[GBTLinkDecodingStat::ErrOldROF]) && !ROFRampUpStage) { - LOGP(error, "{} (cable {}) has IR={} for current majority IR={} -> {}", link->describe(), + LOGP(critical, "{} (cable {}) has IR={} for current majority IR={} -> {}", link->describe(), cableHWID[icab], link->ir.asString(), ir.asString(), link->statistics.ErrNames[GBTLinkDecodingStat::ErrOldROF]); } #endif diff --git a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx index 7042cb7433ac5..da1af34376ff1 100644 --- a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx +++ b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx @@ -202,7 +202,7 @@ void STFDecoder::run(ProcessingContext& pc) if ((expectedTFSize != nTriggersProcessed) && mROFErrRepIntervalMS > 0 && mTFCounter > 1 && nTriggersProcessed > 0) { long currTS = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); if (currTS - lastErrReportTS > mROFErrRepIntervalMS) { - LOGP(error, "Inconsistent number of ROF per TF. From parameters: {} from readout: {} (muting further reporting for {} ms)", expectedTFSize, nTriggersProcessed, mROFErrRepIntervalMS); + LOGP(critical, "Inconsistent number of ROF per TF. From parameters: {} from readout: {} (muting further reporting for {} ms)", expectedTFSize, nTriggersProcessed, mROFErrRepIntervalMS); lastErrReportTS = currTS; } } From 5b5f1d96e60af52e9bf70796b23e1a4f5f0c544c Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Fri, 16 May 2025 16:09:42 +0200 Subject: [PATCH 0370/1764] Include/adapt QED for special runs (#14242) * Include QED for special runs --- Generators/include/Generators/QEDGenParam.h | 11 ++++++----- Generators/share/external/QEDepem.C | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Generators/include/Generators/QEDGenParam.h b/Generators/include/Generators/QEDGenParam.h index 1c78b14cfc516..0a79f113fc4ac 100644 --- a/Generators/include/Generators/QEDGenParam.h +++ b/Generators/include/Generators/QEDGenParam.h @@ -24,11 +24,12 @@ namespace o2 namespace eventgen { struct QEDGenParam : public o2::conf::ConfigurableParamHelper { - - float yMin = -6.f; ///< min Y - float yMax = 6.f; ///< max Y - float ptMin = 0.4e-3; ///< min pT - float ptMax = 10.f; ///< min pT + float yMin = -6.f; ///< min Y + float yMax = 6.f; ///< max Y + float ptMin = 0.4e-3; ///< min pT + float ptMax = 10.f; ///< min pT + float cmEnergy = 5360.f; ///< center of mass energy per nucleon pair in GeV + float Z = 82.f; ///< atomic number of the projectile/target (only symmetric systems are compatible for now) // float xSectionQED = -1; ///< estimated QED x-section in barns float xSectionHad = 8.; ///< reference hadronic x-section for the same system diff --git a/Generators/share/external/QEDepem.C b/Generators/share/external/QEDepem.C index 1e464ec69be00..d9103d1476df0 100644 --- a/Generators/share/external/QEDepem.C +++ b/Generators/share/external/QEDepem.C @@ -35,6 +35,8 @@ o2::eventgen::GeneratorTGenerator* QEDepem() genBg->SetPtRange(qedParam.ptMin, qedParam.ptMax); // Set pt limits (GeV) for e+-: 1MeV corresponds to max R=13.3mm at 5kGaus genBg->SetOrigin(diamond.position[0], diamond.position[1], diamond.position[2]); // vertex position in space genBg->SetSigma(diamond.width[0], diamond.width[1], diamond.width[2]); // vertex sigma + genBg->SetCMEnergy(qedParam.cmEnergy); // center of mass energy per nucleon pair in GeV + genBg->SetZ(qedParam.Z); // atomic number of the projectile/target (only symmetric systems are compatible for now) genBg->SetTimeOrigin(0.); // vertex position in time initialized = genBg->Init(); if (!initialized) { From 64315adcaddfc72a231bc8c15427b03d6f239e87 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 16 May 2025 15:43:44 +0200 Subject: [PATCH 0371/1764] GPU: Check that we do not optimize RTC when we have 2 different GPUReconstruction instances --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 9 ++++++++- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 3ef995b9f9561..c76bf11c3e25d 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -347,15 +347,22 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings->nTPCClustererLanes = GPUCA_NSECTORS; } + if (GetProcessingSettings().doublePipeline) { + mProcessingSettings->rtctech.allowOptimizedSlaveReconstruction = true; + } if (GetProcessingSettings().doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { GPUError("Must use double pipeline mode only with exactly one chain that must support it"); return 1; } - if (mMaster == nullptr && GetProcessingSettings().doublePipeline) { mPipelineContext.reset(new GPUReconstructionPipelineContext); } + if (mMaster && GetProcessingSettings().rtc.enable && (GetProcessingSettings().rtc.optConstexpr || GetProcessingSettings().rtc.optSpecialCode) && !GetProcessingSettings().rtctech.allowOptimizedSlaveReconstruction) { + GPUError("Not allowed to create optimized RTC code with more than one GPUReconstruction instances"); + return 1; + } + mDeviceMemorySize = mHostMemorySize = 0; for (uint32_t i = 0; i < mChains.size(); i++) { if (mChains[i]->EarlyConfigure()) { diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9400a429fca81..b9be1db881816 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -228,6 +228,7 @@ AddOption(runTest, int32_t, 0, "", 0, "Do not run the actual benchmark, but just AddOption(cacheMutex, bool, true, "", 0, "Use a file lock to serialize access to the cache folder") AddOption(ignoreCacheValid, bool, false, "", 0, "If set, allows to use RTC cached code files even if they are not valid for the current source code / parameters") AddOption(printLaunchBounds, bool, false, "", 0, "Print launch bounds used for RTC code as debugging option") +AddOption(allowOptimizedSlaveReconstruction, bool, false, "", 0, "Allow RTC with slave GPUReconstruction instances with optConstexpr and optSpecialcode") AddOption(cacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(prependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(overrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") // Part of cmdLine, so checked against the cache From dc60e3535f333042ef6dcb665cf4c3cfb5d95cfa Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 16 May 2025 13:22:19 +0200 Subject: [PATCH 0372/1764] By default process K0s only in the sync. svertexer To rever to the full mode use define SECVTXK0ONLY=0 --- .../include/DetectorsVertexing/SVertexHypothesis.h | 4 ++-- prodtests/full-system-test/dpl-workflow.sh | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h b/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h index 1450e0c15e98c..c3fd74aa7eeff 100644 --- a/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h +++ b/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h @@ -60,7 +60,7 @@ class SVertexHypothesis bool check(float p2Pos, float p2Neg, float p2V0, float ptV0) const { // check if given mass and pt is matching to hypothesis - return check(calcMass(p2Pos, p2Neg, p2V0), ptV0); + return mPars[SigmaM] > 0 && check(calcMass(p2Pos, p2Neg, p2V0), ptV0); } bool check(float mass, float pt) const { // check if given mass and pt is matching to hypothesis @@ -151,7 +151,7 @@ class SVertex3Hypothesis bool check(float p2Pos, float p2Neg, float p2Bach, float p2Tot, float ptV0) const { // check if given mass and pt is matching to hypothesis - return check(calcMass(p2Pos, p2Neg, p2Bach, p2Tot), ptV0); + return mPars[SigmaM] > 0 && check(calcMass(p2Pos, p2Neg, p2Bach, p2Tot), ptV0); } bool check(float mass, float pt) const diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index b51594115154c..4e6cbbebe7db7 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -94,6 +94,7 @@ TPC_CORR_OPT= TPC_CORR_KEY= INTERACTION_TAG_CONFIG_KEY= EVE_OPT=" --jsons-folder $EDJSONS_DIR" +: ${SECVTXK0ONLY:=} : ${EVE_CONFIG:=} : ${STRTRACKING:=} : ${ITSEXTRAERR:=} @@ -591,7 +592,14 @@ has_detector_reco ITS && has_detector_gpu ITS TPC && [[ -z "$DISABLE_ROOT_OUTPUT has_detector_matching PRIMVTX && [[ ! -z "$VERTEXING_SOURCES" ]] && [[ $GLOBAL_READER_NEEDS_PV != 1 ]] && add_W o2-primary-vertexing-workflow "$DISABLE_MC $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $PVERTEX_CONFIG --pipeline $(get_N primary-vertexing MATCH REST 1 PRIMVTX),$(get_N pvertex-track-matching MATCH REST 1 PRIMVTXMATCH)" "${PVERTEXING_CONFIG_KEY};${INTERACTION_TAG_CONFIG_KEY};" if [[ $BEAMTYPE != "cosmic" ]] && has_detectors_reco ITS && has_detector_matching SECVTX && [[ ! -z "$SVERTEXING_SOURCES" ]]; then - [[ $GLOBAL_READER_NEEDS_SV != 1 ]] && add_W o2-secondary-vertexing-workflow "$DISABLE_MC $STRTRACKING $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $TPC_CORR_OPT --vertexing-sources $SVERTEXING_SOURCES --threads $SVERTEX_THREADS --pipeline $(get_N secondary-vertexing MATCH REST $SVERTEX_THREADS SECVTX)" "$TPC_CORR_KEY" + : ${REDUCESV_OPT:=} + : ${REDUCESV_CONF:=} + if [[ $SYNCMODE == 1 ]] && [[ $SECVTXK0ONLY != 0 ]] ; then + : ${STRTRACKING:=" --disable-strangeness-tracker "} + : ${REDUCESV_OPT:=" --disable-cascade-finder --disable-3body-finder "} + : ${REDUCESV_CONF:="svertexer.pidCutsPhoton[0]=-1;svertexer.pidCutsLambda[0]=-1;svertexer.pidCutsHTriton[0]=-1;svertexer.pidCutsHhydrog4[0]=-1;"} + fi + [[ $GLOBAL_READER_NEEDS_SV != 1 ]] && add_W o2-secondary-vertexing-workflow "$DISABLE_MC $STRTRACKING $REDUCESV_OPT $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $TPC_CORR_OPT --vertexing-sources $SVERTEXING_SOURCES --threads $SVERTEX_THREADS --pipeline $(get_N secondary-vertexing MATCH REST $SVERTEX_THREADS SECVTX)" "$TPC_CORR_KEY;$REDUCESV_CONF" SECTVTX_ON="1" else SECTVTX_ON="0" From 966097b751dced62af09972912f8711dac66b7e4 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 16 May 2025 17:37:22 +0200 Subject: [PATCH 0373/1764] Possibility to define AlignParam as Local Delta The constructors got extra argument convertLocalToGlobal (by default true) which tells that if the provided parameters correspond to the local delta (i.e. argument global == false), then it should be converted to global delta. Otherwise, the delta being local or global is fully defined by the value of the global argument. The AlignParam::applyToGeometry() accounts for the type of persistent type of delta and avoids extra global -> local conversion if mIsGlobalDelta is false. --- .../DetectorsCommonDataFormats/AlignParam.h | 17 +++++-- .../Detectors/Common/src/AlignParam.cxx | 46 +++++++++++-------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h b/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h index c4e702c6ae27e..a93a37c1e36ab 100644 --- a/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h +++ b/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h @@ -37,9 +37,12 @@ class AlignParam AlignParam(const char* symname, int algID, // volume symbolic name and its alignable ID double x, double y, double z, // delta translation double psi, double theta, double phi, // delta rotation - bool global = true); // global (preferable) or local delta definition + bool global = true, // global (preferable) or local delta definition + bool convertLocalToGlobal = true); // if local is provided, convert it to global - AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global = true); + AlignParam(const char* symname, int algID, TGeoMatrix& m, + bool global = true, // global (preferable) or local delta definition + bool convertLocalToGlobal = true); // if local is provided, convert it to global /// return symbolic name of the volume const std::string& getSymName() const { return mSymName; } @@ -70,6 +73,9 @@ class AlignParam void setAlignableID(int id) { mAlignableID = id; } /// ================ methods for direct setting of delta params + /// set parameters + void setParams(double x, double y, double z, double psi, double theta, double phi); + /// set parameters of global delta void setGlobalParams(double x, double y, double z, double psi, double theta, double phi); @@ -114,6 +120,9 @@ class AlignParam int rectify(double zero = 1e-13); + bool isGlobal() const { return mIsGlobal; } + void setIsGlobal(bool v) { mIsGlobal = v; } + protected: bool matrixToAngles(const double* rot, double& psi, double& theta, double& phi) const; void anglesToMatrix(double psi, double theta, double phi, double* rot) const; @@ -123,8 +132,8 @@ class AlignParam private: std::string mSymName{}; + bool mIsGlobal = true; /// is this global delta? int mAlignableID = -1; /// alignable ID (set for sensors only) - double mX = 0.; ///< X translation of global delta double mY = 0.; ///< Y translation of global delta double mZ = 0.; ///< Z translation of global delta @@ -133,7 +142,7 @@ class AlignParam double mTheta = 0.; ///< "roll" : Euler angle of rotation around Y axis after 1st rotation (radians) double mPhi = 0.; ///< "yaw" : Euler angle of rotation around Z axis (radians) - ClassDefNV(AlignParam, 1); + ClassDefNV(AlignParam, 2); }; } // namespace detectors diff --git a/DataFormats/Detectors/Common/src/AlignParam.cxx b/DataFormats/Detectors/Common/src/AlignParam.cxx index 90f2a349607a1..f20cf3dac4971 100644 --- a/DataFormats/Detectors/Common/src/AlignParam.cxx +++ b/DataFormats/Detectors/Common/src/AlignParam.cxx @@ -26,8 +26,9 @@ using namespace o2::detectors; AlignParam::AlignParam(const char* symname, int algID, // volume symbolic name and its alignable ID double x, double y, double z, // delta translation double psi, double theta, double phi, // delta rotation - bool global) // global (preferable) or local delta definition - : mSymName(symname), mAlignableID(algID) + bool global, // global (preferable) or local delta definition + bool convertLocalToGlobal) // if local is provided, convert it to global + : mSymName(symname), mIsGlobal(global || convertLocalToGlobal), mAlignableID(algID) { /// standard constructor with 3 translation + 3 rotation parameters /// If the user explicitly sets the global variable to false then the @@ -35,23 +36,22 @@ AlignParam::AlignParam(const char* symname, int algID, // volume symbolic /// This requires to have a gGeoMenager active instance, otherwise the /// constructor will fail (no object created) - if (global) { - setGlobalParams(x, y, z, psi, theta, phi); - } else { + setParams(x, y, z, psi, theta, phi); + if (!global && convertLocalToGlobal) { setLocalParams(x, y, z, psi, theta, phi); } } //___________________________________________________ -AlignParam::AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global) - : mSymName(symname), mAlignableID(algID) +AlignParam::AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global, bool convertLocalToGlobal) + : mSymName(symname), mIsGlobal(global || convertLocalToGlobal), mAlignableID(algID) { setTranslation(m); if (!setRotation(m)) { const double* rot = m.GetRotationMatrix(); throw std::runtime_error(fmt::format("Failed to extract roll-pitch-yall angles from [[{},{},{}], [{},{},{}], [{},{},{}] for {}", rot[0], rot[1], rot[2], rot[3], rot[4], rot[5], rot[6], rot[7], rot[8], symname)); } - if (!global && !setLocalParams(mX, mY, mZ, mPsi, mTheta, mPhi)) { + if (!global && convertLocalToGlobal && !setLocalParams(mX, mY, mZ, mPsi, mTheta, mPhi)) { throw std::runtime_error(fmt::format("Alignment creation for {} failed: geomManager is absent", symname)); } } @@ -223,6 +223,10 @@ bool AlignParam::createLocalMatrix(TGeoHMatrix& m) const // In case that the TGeo was not initialized or not closed, // returns false and the object parameters are not set. // + m = createMatrix(); + if (!mIsGlobal) { + return true; + } if (!gGeoManager || !gGeoManager->IsClosed()) { LOG(error) << "Can't get the local alignment object parameters! gGeoManager doesn't exist or it is still open!"; return false; @@ -247,7 +251,6 @@ bool AlignParam::createLocalMatrix(TGeoHMatrix& m) const LOG(error) << "Volume name or path " << symname << " is not valid!"; return false; } - m = createMatrix(); TGeoHMatrix gprime, gprimeinv; gprime = *node->GetMatrix(); gprimeinv = gprime.Inverse(); @@ -302,18 +305,15 @@ bool AlignParam::applyToGeometry() const } // double threshold = 0.001; - - TGeoHMatrix gprime = *node->GetMatrix(); - TGeoHMatrix align = createMatrix(); - gprime.MultiplyLeft(&align); - TGeoHMatrix* ginv = new TGeoHMatrix; // TGeoPhysicalNode takes and manages raw pointer, need naked new! - TGeoHMatrix* g = node->GetMatrix(node->GetLevel() - 1); - *ginv = g->Inverse(); - *ginv *= gprime; - + TGeoHMatrix* align = new TGeoHMatrix(createMatrix()); + if (mIsGlobal) { + align->Multiply(node->GetMatrix()); + TGeoHMatrix* g = node->GetMatrix(node->GetLevel() - 1); + align->MultiplyLeft(node->GetMatrix(node->GetLevel() - 1)->Inverse()); + } LOG(debug) << "Aligning volume " << symname; - node->Align(ginv); + node->Align(align); return true; } @@ -359,6 +359,14 @@ void AlignParam::setGlobalParams(double x, double y, double z, double psi, doubl setRotation(psi, theta, phi); } +//_____________________________________________________________________________ +void AlignParam::setParams(double x, double y, double z, double psi, double theta, double phi) +{ + /// set parameters of global delta + setTranslation(x, y, z); + setRotation(psi, theta, phi); +} + //_____________________________________________________________________________ void AlignParam::setRotation(double psi, double theta, double phi) { From a6af3174a7011e8a0620b71a14fee134275f3815 Mon Sep 17 00:00:00 2001 From: swenzel Date: Fri, 16 May 2025 22:13:05 +0200 Subject: [PATCH 0374/1764] TPC init: load more params from CCDB for GEMAmpl --- Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index a04f73a62fbf8..b111d8f372967 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -309,11 +309,16 @@ void initTPC(long timestamp) auto& cdb = o2::tpc::CDBInterface::instance(); cdb.setUseDefaults(); - // IMPORTANT: load ParameterGEM from CCDB + // IMPORTANT: load ParameterGEM, ParameterGas and CalPadGainFull from CCDB to correctly init GEMAmplification auto& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGEM), timestamp); - LOGP(info, "initTPC: TPC GEM param updated for time {}", timestamp); + LOGP(info, "initTPC: TPC GEM param, Gas param + CalPadGainFull updated for time {}", timestamp); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::CalPadGainFull), timestamp); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGas), timestamp); + o2::tpc::ParameterGEM::Instance().printKeyValues(true, true); + o2::tpc::ParameterGas::Instance().printKeyValues(true, true); + // by invoking this constructor we make sure that a common file will be created // in future we should take this from OCDB and just forward per message const static auto& ampl = o2::tpc::GEMAmplification::instance(); From 88baea9a23ba9f20083204b9ae46a63eaa19038b Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:06:23 +0200 Subject: [PATCH 0375/1764] DPL Analysis: `as(cfg)` for adjusting the types of configurables in expressions (#14276) --- .../Core/include/Framework/Expressions.h | 22 +++++++++++++------ Framework/Core/test/test_Expressions.cxx | 9 ++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 9e00388ee5df8..18c930700a91d 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -12,13 +12,11 @@ #define O2_FRAMEWORK_EXPRESSIONS_H_ #include "Framework/BasicOps.h" -#include "Framework/CompilerBuiltins.h" #include "Framework/Pack.h" #include "Framework/Configurable.h" #include "Framework/Variant.h" #include "Framework/InitContext.h" #include "Framework/ConfigParamRegistry.h" -#include "Framework/RuntimeError.h" #include #include #include @@ -143,13 +141,17 @@ struct OpNode { /// A placeholder node for simple type configurable struct PlaceholderNode : LiteralNode { template + requires(variant_trait_v::type> != VariantType::Unknown) PlaceholderNode(Configurable const& v) : LiteralNode{v.value}, name{v.name} { - if constexpr (variant_trait_v::type> != VariantType::Unknown) { - retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; - } else { - unknownParameterUsed(name.c_str()); - } + retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; + } + + template + requires((std::convertible_to) && (variant_trait_v::type> != VariantType::Unknown)) + PlaceholderNode(Configurable const& v, AT*) : LiteralNode{static_cast(v.value)}, name{v.name} + { + retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{static_cast(context.options().get(name))}; }; } PlaceholderNode(PlaceholderNode const& other) = default; @@ -163,6 +165,12 @@ struct PlaceholderNode : LiteralNode { LiteralNode::var_t (*retrieve)(InitContext&, char const*); }; +template +PlaceholderNode as(Configurable const& v) +{ + return PlaceholderNode(v, (AT*)nullptr); +} + /// A placeholder node for parameters taken from an array struct ParameterNode : LiteralNode { ParameterNode(int index_ = -1) diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 2296b5dcbfbc4..6faa2fc352232 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -146,6 +146,15 @@ TEST_CASE("TestTreeParsing") REQUIRE(ptfilterspecs2[0].left == (DatumSpec{std::string{"fPt"}, typeid(o2::aod::track::Pt).hash_code(), atype::FLOAT})); REQUIRE(ptfilterspecs2[0].right == (DatumSpec{LiteralNode::var_t{1.0f}, atype::FLOAT})); REQUIRE(ptfilterspecs2[0].result == (DatumSpec{0u, atype::BOOL})); + + Configurable cvalue{"cvalue", 1, "test value"}; + Filter testFilter = o2::aod::track::tpcNClsShared < as(cvalue); + REQUIRE(testFilter.node->self.index() == 2); + REQUIRE(testFilter.node->left->self.index() == 1); + REQUIRE(testFilter.node->right->self.index() == 3); + REQUIRE(std::get(testFilter.node->right->self).name == "cvalue"); + auto testSpecs = createOperations(testFilter); + REQUIRE(testSpecs[0].right == (DatumSpec{LiteralNode::var_t{(uint8_t)1}, atype::UINT8})); } TEST_CASE("TestGandivaTreeCreation") From 4d20c8d9087a28bbd16984c36abef7b58b1f2802 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:06:56 +0200 Subject: [PATCH 0376/1764] DPL Analysis: improve/fix join error by not relying on metadata (#14249) --- Framework/Core/include/Framework/ASoA.h | 10 ++++++---- .../Core/include/Framework/AnalysisManagers.h | 8 ++++---- Framework/Core/include/Framework/AnalysisTask.h | 4 ++-- Framework/Core/include/Framework/TableBuilder.h | 7 ++++--- Framework/Core/src/ASoA.cxx | 7 ++----- Framework/Core/test/test_ASoA.cxx | 15 +++++++++++++++ 6 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index e098cd89f6d5d..c13cfe912c814 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1245,7 +1245,7 @@ struct TableIterator : IP, C... { }; struct ArrowHelpers { - static std::shared_ptr joinTables(std::vector>&& tables); + static std::shared_ptr joinTables(std::vector>&& tables, std::span labels); static std::shared_ptr concatTables(std::vector>&& tables); }; @@ -1683,6 +1683,7 @@ class Table using table_t = self_t; static constexpr const auto originals = computeOriginals(); + static constexpr const auto originalLabels = [] refs, size_t... Is>(std::index_sequence) { return std::array{o2::aod::label()...}; }.template operator()(std::make_index_sequence()); template bindings> requires(ref.origin_hash == "CONC"_h) @@ -1931,7 +1932,7 @@ class Table Table(std::vector>&& tables, uint64_t offset = 0) requires(ref.origin_hash != "CONC"_h) - : Table(ArrowHelpers::joinTables(std::move(tables)), offset) + : Table(ArrowHelpers::joinTables(std::move(tables), std::span{originalLabels}), offset) { } @@ -3213,7 +3214,7 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. bindInternalIndicesTo(this); } JoinFull(std::vector>&& tables, uint64_t offset = 0) - : base{ArrowHelpers::joinTables(std::move(tables)), offset} + : base{ArrowHelpers::joinTables(std::move(tables), std::span{base::originalLabels}), offset} { bindInternalIndicesTo(this); } @@ -3223,6 +3224,7 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. using self_t = JoinFull; using table_t = base; static constexpr const auto originals = base::originals; + static constexpr const auto originalLabels = base::originalLabels; using columns_t = typename table_t::columns_t; using persistent_columns_t = typename table_t::persistent_columns_t; using iterator = table_t::template iterator_template; @@ -3293,7 +3295,7 @@ using Join = JoinFull, Ts...>; template constexpr auto join(Ts const&... t) { - return Join(ArrowHelpers::joinTables({t.asArrowTable()...})); + return Join(ArrowHelpers::joinTables({t.asArrowTable()...}, std::span{Join::base::originalLabels})); } template diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 330eaf01f0be4..b76d88ea5ee66 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -282,14 +282,14 @@ template bool prepareOutput(ProcessingContext& context, T& spawns) { using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); if (originalTable->schema()->fields().empty() == true) { using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), spawns.projector)); - spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable})); + spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } @@ -304,14 +304,14 @@ template bool prepareOutput(ProcessingContext& context, T& defines) { using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); if (originalTable->schema()->fields().empty() == true) { using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } defines.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), defines.projectors.data(), defines.projector)); - defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable})); + defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index c7f3da1948c62..257a5358463c6 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -201,9 +201,9 @@ struct AnalysisDataProcessorBuilder { std::shared_ptr table = nullptr; auto joiner = [&record] refs, size_t... Is>(std::index_sequence) { return std::vector{extractTableFromRecord(record)...}; }; if constexpr (soa::is_iterator) { - table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence())); + table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence()), std::span{T::parent_t::originalLabels}); } else { - table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence())); + table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence()), std::span{T::originalLabels}); } expressions::updateFilterInfo(info, table); if constexpr (!o2::soa::is_smallgroups>) { diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 0b35d5be083e4..e2d12789ef922 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -771,7 +771,7 @@ template auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) { using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } @@ -798,7 +798,7 @@ template auto spawner(std::vector>&& tables, const char* name, std::shared_ptr& projector) { using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } @@ -834,7 +834,8 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, s template auto spawner(framework::pack columns, std::vector>&& tables, const char* name, std::shared_ptr& projector) { - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + std::array labels{"original"}; + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 810398747de88..bd6ca551d24ec 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -64,7 +64,7 @@ SelectionVector sliceSelection(gsl::span const& mSelectedRows, in return slicedSelection; } -std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables) +std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables, std::span labels) { if (tables.size() == 1) { return tables[0]; @@ -72,10 +72,7 @@ std::shared_ptr ArrowHelpers::joinTables(std::vectornum_rows() != tables[i + 1]->num_rows()) { throw o2::framework::runtime_error_f("Tables %s and %s have different sizes (%d vs %d) and cannot be joined!", - tables[i]->schema()->metadata()->Get("label").ValueOrDie().c_str(), - tables[i + 1]->schema()->metadata()->Get("label").ValueOrDie().c_str(), - tables[i]->num_rows(), - tables[i + 1]->num_rows()); + labels[i], labels[i + 1], tables[i]->num_rows(), tables[i + 1]->num_rows()); } } std::vector> fields; diff --git a/Framework/Core/test/test_ASoA.cxx b/Framework/Core/test/test_ASoA.cxx index 6d9ee16fec84d..80519aebc9ee7 100644 --- a/Framework/Core/test/test_ASoA.cxx +++ b/Framework/Core/test/test_ASoA.cxx @@ -31,6 +31,7 @@ namespace test DECLARE_SOA_COLUMN(X, x, int); DECLARE_SOA_COLUMN(Y, y, int); DECLARE_SOA_COLUMN(Z, z, int); +DECLARE_SOA_COLUMN(W, w, int); DECLARE_SOA_DYNAMIC_COLUMN(Sum, sum, [](int x, int y) { return x + y; }); DECLARE_SOA_EXPRESSION_COLUMN(ESum, esum, int, test::x + test::y); } // namespace test @@ -268,9 +269,17 @@ TEST_CASE("TestJoinedTables") rowWriterZ(0, 8); auto tableZ = builderZ.finalize(); + TableBuilder builderW; + auto rowWriterW = builderW.persist({"fW"}); + rowWriterW(0, 8); + rowWriterW(0, 8); + rowWriterW(0, 8); + auto tableW = builderW.finalize(); + using TestX = InPlaceTable<"A0"_h, o2::aod::test::X>; using TestY = InPlaceTable<"A1"_h, o2::aod::test::Y>; using TestZ = InPlaceTable<"A2"_h, o2::aod::test::Z>; + using TestW = InPlaceTable<"A3"_h, o2::aod::test::W>; using Test = Join; REQUIRE(Test::contains()); @@ -303,6 +312,12 @@ TEST_CASE("TestJoinedTables") for (auto& test : tests4) { REQUIRE(15 == test.x() + test.y() + test.z()); } + + try { + auto testF = join(TestZ{tableZ}, TestW{tableW}); + } catch (RuntimeErrorRef ref) { + REQUIRE(std::string{error_from_ref(ref).what} == "Tables TEST and TEST have different sizes (8 vs 3) and cannot be joined!"); + } } TEST_CASE("TestConcatTables") From 35e208b13fb4c30c38ff4b3d05535fb370b53c0a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:07:42 +0200 Subject: [PATCH 0377/1764] DPL Analysis: prevent slice cache from updating unnecessarily (#14257) * Cache setup now only happens after init when process configurables' values are final * Add inline contrained functions to avoid using "overloaded" * add error messages for unexpected situations --- Framework/Core/include/Framework/ASoA.h | 12 +-- .../Core/include/Framework/AnalysisManagers.h | 14 ++- .../Core/include/Framework/AnalysisTask.h | 102 ++++++++++++------ .../Framework/ArrowTableSlicingCache.h | 45 +++++--- .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 ++-- Framework/Core/src/ArrowTableSlicingCache.cxx | 60 +++++++---- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 167 insertions(+), 93 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index c13cfe912c814..4ed8e830fe137 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - StringPair bindingKey; + Entry bindingKey; bool isMissing() const; - StringPair const& getBindingKey() const; + Entry const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index b76d88ea5ee66..1d894b2b67948 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,39 +534,43 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, std::vector&, std::vector&) +bool registerCache(T&, Cache&, Cache&) { return false; } template requires std::same_as -bool registerCache(T& preslice, std::vector& bsks, std::vector&) +bool registerCache(T& preslice, Cache& bsks, Cache&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, std::vector&, std::vector& bsksU) +bool registerCache(T& preslice, Cache&, Cache& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index 257a5358463c6..b53929f326712 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -65,21 +65,18 @@ concept is_enumeration = is_enumeration_v>; // the contents of an AnalysisTask... namespace { struct AnalysisDataProcessorBuilder { - template - static void addGroupingCandidates(std::vector& bk, std::vector& bku) + template + static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) { - [&bk, &bku](framework::pack) mutable { - std::string key; - if constexpr (soa::is_iterator>) { - key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); - } - ([&bk, &bku, &key]() mutable { + [&bk, &bku, enabled](framework::pack) mutable { + auto key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); + ([&bk, &bku, &key, enabled]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key); + framework::updatePairList(bku, binding, key, enabled); } else { - framework::updatePairList(bk, binding, key); + framework::updatePairList(bk, binding, key, enabled); } } }(), @@ -145,34 +142,72 @@ struct AnalysisDataProcessorBuilder { } /// helper to parse the process arguments + template + inline static bool requestInputsFromArgs(T&, std::string const&, std::vector&, std::vector&) + { + return false; + } + template + inline static bool requestInputsFromArgs(T& pc, std::string const& name, std::vector& inputs, std::vector& eis) + { + AnalysisDataProcessorBuilder::inputsFromArgs(pc.process, (name + "/" + pc.name).c_str(), pc.value, inputs, eis); + return true; + } + template + inline static bool requestCacheFromArgs(T&, Cache&, Cache&) + { + return false; + } + template + inline static bool requestCacheFromArgs(T& pc, Cache& bk, Cache& bku) + { + AnalysisDataProcessorBuilder::cacheFromArgs(pc.process, pc.value, bk, bku); + return true; + } /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&) //, Cache&, Cache&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. DataSpecUtils::updateInputList(inputs, InputSpec{"enumeration", "DPL", "ENUM", 0, Lifetime::Enumeration, inputMetadata}); } - /// 2. grouping case - 1st argument is an iterator + /// 2. 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos) //, Cache& bk, Cache& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos) //, Cache&, Cache&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions(hash, name, value, inputs, eInfos); } + /// 1. enumeration (no grouping) + template + static void cacheFromArgs(R (C::*)(A), bool, Cache&, Cache&) + { + } + /// 2. iterator (the only grouping case) + template + static void cacheFromArgs(R (C::*)(A, Args...), bool value, Cache& bk, Cache& bku) + { + addGroupingCandidates(bk, bku, value); + } + /// 3. generic case (no grouping) + template + static void cacheFromArgs(R (C::*)(A, Args...), bool, Cache&, Cache&) + { + } + template static auto extractTableFromRecord(InputRecord& record) { @@ -480,8 +515,6 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); @@ -490,23 +523,15 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) /// parse process functions defined by corresponding configurables if constexpr (requires { &T::process; }) { - AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); + AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos); } homogeneous_apply_refs( - overloaded{ - [name = name_str, &expressionInfos, &inputs, &bindingsKeys, &bindingsKeysUnsorted](framework::is_process_configurable auto& x) mutable { - // this pushes (argumentIndex,processHash,schemaPtr,nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators - AnalysisDataProcessorBuilder::inputsFromArgs(x.process, (name + "/" + x.name).c_str(), x.value, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); - return true; - }, - [](auto&) { - return false; - }}, + [name = name_str, &expressionInfos, &inputs](auto& x) mutable { + // this pushes (argumentIndex, processHash, schemaPtr, nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators + return AnalysisDataProcessorBuilder::requestInputsFromArgs(x, name, inputs, expressionInfos); + }, *task.get()); - // add preslice declarations to slicing cache definition - homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); - // request base tables for spawnable extended tables and indices to be built // this checks for duplications homogeneous_apply_refs([&inputs](auto& element) { @@ -526,7 +551,12 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) requiredServices.insert(requiredServices.end(), arrowServices.begin(), arrowServices.end()); homogeneous_apply_refs([&requiredServices](auto& element) { return analysis_task_parsers::addService(requiredServices, element); }, *task.get()); - auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos, bindingsKeys, bindingsKeysUnsorted](InitContext& ic) mutable { + auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos](InitContext& ic) mutable { + Cache bindingsKeys; + Cache bindingsKeysUnsorted; + // add preslice declarations to slicing cache definition + homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); + homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareOption(ic, element); }, *task.get()); homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareService(ic, element); }, *task.get()); @@ -556,6 +586,16 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) task->init(ic); } + /// parse process functions to enable requested grouping caches - note that at this state process configurables have their final values + if constexpr (requires { &T::process; }) { + AnalysisDataProcessorBuilder::cacheFromArgs(&T::process, true, bindingsKeys, bindingsKeysUnsorted); + } + homogeneous_apply_refs( + [&bindingsKeys, &bindingsKeysUnsorted](auto& x) mutable { + return AnalysisDataProcessorBuilder::requestCacheFromArgs(x, bindingsKeys, bindingsKeysUnsorted); + }, + *task.get()); + ic.services().get().setCaches(std::move(bindingsKeys)); ic.services().get().setCachesUnsorted(std::move(bindingsKeysUnsorted)); // initialize global caches diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 2edc23a63ce76..292a67023fc5e 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,51 +34,64 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -using StringPair = std::pair; +struct Entry { + std::string binding; + std::string key; + bool enabled; + + Entry(std::string b, std::string k, bool e = true) + : binding{b}, + key{k}, + enabled{e} + { + } +}; + +using Cache = std::vector; -void updatePairList(std::vector& list, std::string const& binding, std::string const& key); +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; - void setCaches(std::vector&& bsks); - void setCachesUnsorted(std::vector&& bsks); + void setCaches(Cache&& bsks); + void setCachesUnsorted(Cache&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - std::vector bindingsKeys; + Cache bindingsKeys; std::vector>> values; std::vector>> counts; - std::vector bindingsKeysUnsorted; + Cache bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); // set caching information externally - void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(StringPair const& bindingKey) const; - int getCachePosSortedFor(StringPair const& bindingKey) const; - int getCachePosUnsortedFor(StringPair const& bindingKey) const; + std::pair getCachePos(Entry const& bindingKey) const; + int getCachePosSortedFor(Entry const& bindingKey) const; + int getCachePosUnsortedFor(Entry const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; + SliceInfoPtr getCacheFor(Entry const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); + static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index 64b1d863c59e6..b8436314b057e 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = std::make_pair(binding, mIndexColumnName); + auto bk = Entry(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index bd6ca551d24ec..3a681ee931a2b 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -194,7 +194,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -StringPair const& PreslicePolicyBase::getBindingKey() const +Entry const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 12a4c7131e828..3b13e30581f70 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,26 +567,27 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, + Cache{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0; i < caches.size(); ++i) { - if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < caches.size(); ++i) { + if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0; i < unsortedCaches.size(); ++i) { - if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < unsortedCaches.size(); ++i) { + if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 4b31f96e32fba..e001e293c4733 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -19,10 +19,13 @@ namespace o2::framework { -void updatePairList(std::vector& list, std::string const& binding, std::string const& key) +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { - list.emplace_back(binding, key); + auto locate = std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }); + if (locate == list.end()) { + list.emplace_back(binding, key, enabled); + } else if (!locate->enabled && enabled) { + locate->enabled = true; } } @@ -65,17 +68,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -86,7 +89,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, s groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -107,11 +110,15 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< counts[pos].reset(); return arrow::Status::OK(); } + auto& [b, k, e] = bindingsKeys[pos]; + if (!e) { + throw runtime_error_f("Disabled cache %s/%s update requested", b.c_str(), k.c_str()); + } validateOrder(bindingsKeys[pos], table); arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -128,7 +135,10 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k] = bindingsKeysUnsorted[pos]; + auto& [b, k, e] = bindingsKeysUnsorted[pos]; + if (!e) { + throw runtime_error_f("Disabled unsorted cache %s/%s update requested", b.c_str(), k.c_str()); + } auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -139,7 +149,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if (groups[pos].size() <= v) { + if ((int)groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -151,7 +161,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -161,41 +171,47 @@ std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindi if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + } + if (!bindingsKeys[p].enabled) { + throw runtime_error_f("Disabled cache %s/%s is requested", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + } + if (!bindingsKeysUnsorted[p].enabled) { + throw runtime_error_f("Disabled unsorted cache %s/%s is requested", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheUnsortedForPos(p); @@ -224,9 +240,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key] = bindingKey; + auto const& [target, key, enabled] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 161939141e790..091c21eeae229 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From 0c5140edf08d83042e2b8362eb152db6e01e3177 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Mon, 19 May 2025 12:48:54 +0200 Subject: [PATCH 0378/1764] NN clustering: VRAM memory leak fix + (u)int -> (u)int32_t (#14272) * VRAM memory leak fix + (u)int -> (u)int32_t * Please consider the following formatting changes * Fixing my own debug messages * Making shared pointer for releasing * Bug-fix * Adding Davids patch --------- Co-authored-by: ALICE Action Bot --- Common/ML/include/ML/OrtInterface.h | 14 +-- Common/ML/src/OrtInterface.cxx | 42 ++++--- .../Global/GPUChainTrackingClusterizer.cxx | 33 ++++-- .../GPUTPCNNClusterizerHost.cxx | 23 ++-- .../GPUTPCNNClusterizerHost.h | 2 +- .../GPUTPCNNClusterizerKernels.cxx | 108 +++++++++--------- .../GPUTPCNNClusterizerKernels.h | 7 +- 7 files changed, 128 insertions(+), 101 deletions(-) diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index b4f40f3f5c694..7224645425856 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -45,14 +45,10 @@ class OrtModel public: // Constructors & destructors - OrtModel() = default; - OrtModel(std::unordered_map optionsMap) { init(optionsMap); } - void init(std::unordered_map optionsMap) - { - initOptions(optionsMap); - initEnvironment(); - } - virtual ~OrtModel() = default; + OrtModel(); + OrtModel(std::unordered_map optionsMap); + void init(std::unordered_map optionsMap); + virtual ~OrtModel(); // General purpose void initOptions(std::unordered_map optionsMap); @@ -113,7 +109,7 @@ class OrtModel private: // ORT variables -> need to be hidden as pImpl struct OrtVariables; - OrtVariables* mPImplOrt; + std::unique_ptr mPImplOrt; // Input & Output specifications of the loaded network std::vector mInputNamesChar, mOutputNamesChar; diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index df7f0a2deba82..8f31761489997 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -27,11 +27,20 @@ namespace o2 namespace ml { +OrtModel::OrtModel() = default; +OrtModel::OrtModel(std::unordered_map optionsMap) { init(optionsMap); } +OrtModel::~OrtModel() = default; +void OrtModel::init(std::unordered_map optionsMap) +{ + initOptions(optionsMap); + initEnvironment(); +} + struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file // ORT runtime objects Ort::RunOptions runOptions; - std::shared_ptr env = nullptr; - std::shared_ptr session = nullptr; ///< ONNX session + std::unique_ptr env = nullptr; + std::unique_ptr session = nullptr; ///< ONNX session Ort::SessionOptions sessionOptions; Ort::AllocatorWithDefaultOptions allocator; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault); @@ -41,7 +50,7 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c // General purpose void OrtModel::initOptions(std::unordered_map optionsMap) { - mPImplOrt = new OrtVariables(); + mPImplOrt = std::make_unique(); // Load from options map if (!optionsMap.contains("model-path")) { @@ -101,7 +110,7 @@ void OrtModel::initOptions(std::unordered_map optionsM void OrtModel::initEnvironment() { - mPImplOrt->env = std::make_shared( + mPImplOrt->env = std::make_unique( OrtLoggingLevel(mLoggingLevel), (mEnvName.empty() ? "ORT" : mEnvName.c_str()), // Integrate ORT logging into Fairlogger @@ -129,7 +138,7 @@ void OrtModel::initSession() if (mAllocateDeviceMemory) { memoryOnDevice(mDeviceId); } - mPImplOrt->session = std::make_shared(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->session = std::make_unique(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); mPImplOrt->ioBinding = std::make_unique(*mPImplOrt->session); setIO(); @@ -147,12 +156,12 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) (mPImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time (mPImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time // Arena memory shrinkage comes at performance cost - /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; - // (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + // For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; + (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 std::string dev_mem_str = ""; if (mDeviceType == "ROCM") { - dev_mem_str = "Hip"; + dev_mem_str = "HipPinned"; } if (mDeviceType == "CUDA") { dev_mem_str = "Cuda"; @@ -166,7 +175,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) void OrtModel::resetSession() { - mPImplOrt->session = std::make_shared(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->session = std::make_unique(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); } // Getters @@ -252,7 +261,7 @@ void OrtModel::setIO() void OrtModel::setEnv(Ort::Env* env) { - mPImplOrt->env = std::shared_ptr(env); + mPImplOrt->env.reset(env); } // Inference @@ -308,6 +317,14 @@ void OrtModel::inference(I* input, int64_t input_size, O* output) (mPImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); (mPImplOrt->session)->Run(mPImplOrt->runOptions, *mPImplOrt->ioBinding); + // mPImplOrt->session->Run( + // mPImplOrt->runOptions, + // mInputNamesChar.data(), + // &inputTensor, + // mInputNamesChar.size(), + // mOutputNamesChar.data(), + // &outputTensor, + // mOutputNamesChar.size()); } template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); @@ -427,10 +444,7 @@ template std::vector OrtModel::inferencesession->EndProfiling(); - // } - LOG(info) << "(ORT) Size of mPImplOrt: " << sizeof(*mPImplOrt) << " bytes"; + mPImplOrt.reset(); } // private diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 2cdd1bb76bf00..6c4e60a6025e1 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -659,7 +659,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // But environment must be valid, so we init the model environment first and use it here afterwards. // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // if (lane == 0) { + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // } // recreateMemoryAllocator = true; (nnApplications[lane].mModelClass).initSession(); } @@ -671,7 +673,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv()); (nnApplications[lane].mModelReg1).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); (nnApplications[lane].mModelReg1).initSession(); } if (nnApplications[lane].mModelsUsed[2]) { @@ -680,8 +682,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (nnApplications[lane].mModelReg2.getIntraOpNumThreads() > maxThreads) { nnApplications[lane].mModelReg2.setIntraOpNumThreads(maxThreads); } + // (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv()); (nnApplications[lane].mModelReg2).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); (nnApplications[lane].mModelReg2).initSession(); } if (nn_settings.nnClusterizerVerbosity < 3) { @@ -707,8 +710,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (doGPU) { WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } - LOG(info) << "Size of nnApplications[lane]: " << sizeof(nnApplications[0]) << " bytes"; - LOG(info) << "Size of nnApplications: " << sizeof(GPUTPCNNClusterizerHost) * GetProcessingSettings().nTPCClustererLanes << " bytes"; } #endif @@ -976,6 +977,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; + // // bool recreateMemoryAllocator = false; + // if (lane == 0) { + // (nnApplications[lane].mModelClass).initEnvironment(); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0); + // } + // // recreateMemoryAllocator = true; + // (nnApplications[lane].mModelClass).initSession(); + // (nnApplications[lane].mModelReg1).initSession(); + int withMC = (doGPU && propagateMCLabels); if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { @@ -1188,12 +1198,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { - // if (GetProcessingSettings().nn.applyNNclusterizer) { - // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; - // nnApplication.mModelClass.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.mModelReg1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.mModelReg2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // } + if (GetProcessingSettings().nn.applyNNclusterizer) { + LOG(info) << "(ORT) Environment releasing..."; + GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; + nnApplication.mModelClass.release(true); + nnApplication.mModelReg1.release(true); + nnApplication.mModelReg2.release(true); + } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index ca2deec60601c..90f1d6e27246f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -136,8 +136,8 @@ struct MockedOrtAllocator : OrtAllocator { std::atomic memory_inuse{0}; std::atomic num_allocations{0}; std::atomic num_reserve_allocations{0}; - OrtMemoryInfo* memory_info; - GPUReconstruction* rec; + OrtMemoryInfo* mMemoryInfoInternal; + GPUReconstruction* mRecInternal; }; MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) @@ -147,37 +147,36 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast(this_)->Info(); }; OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Reserve(size); }; - rec = r; - memory_info = info; + mRecInternal = r; + mMemoryInfoInternal = info; } MockedOrtAllocator::~MockedOrtAllocator() { - // Ort::GetApi().ReleaseMemoryInfo(memory_info); + // Ort::GetApi().ReleaseMemoryInfo(mMemoryInfoInternal); (void)0; // Suppress warning for empty destructor } void* MockedOrtAllocator::Alloc(size_t size) { - // LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes"; - return rec->AllocateVolatileDeviceMemory(size); + LOG(info) << "(ORT) Allocating direct memory of size " << size << " bytes"; + return mRecInternal->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK); } void* MockedOrtAllocator::Reserve(size_t size) { - // LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes"; - return rec->AllocateVolatileDeviceMemory(size); + LOG(info) << "(ORT) Reserving direct memory of size " << size << " bytes"; + return mRecInternal->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK); } void MockedOrtAllocator::Free(void* p) { // LOG(info) << "(ORT) Freeing volatile memory " << p; - rec->ReturnVolatileDeviceMemory(); } const OrtMemoryInfo* MockedOrtAllocator::Info() const { - return memory_info; + return mMemoryInfoInternal; } size_t MockedOrtAllocator::NumAllocations() const @@ -197,7 +196,7 @@ void MockedOrtAllocator::LeakCheck() } } -void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) +void GPUTPCNNClusterizerHost::directOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) { mMockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); if (recreate) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index e659753f21d7d..4334c3418eb09 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -53,7 +53,7 @@ class GPUTPCNNClusterizerHost void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); // ONNX - void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); + void directOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); MockedOrtAllocator* getMockedAllocator(); const OrtMemoryInfo* getMockedMemoryInfo(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 47bc5e8da80ca..8ca61602ab4e9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -35,9 +35,9 @@ using namespace o2::gpu::tpccf; // Defining individual thread functions for data filling, determining the class label and running the CF clusterizer template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; if (clustererNN.mOutputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices @@ -51,29 +51,29 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - uint write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; - int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; + int32_t row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); + int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif - for (int r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { + for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); - int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); - for (int p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { + int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); + for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); - for (int t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { + for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { if (!is_boundary) { CfChargePos tmp_pos(row + r, pad + p, time + t); if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization @@ -111,21 +111,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - uint base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize); - uint transient_index = glo_idx % clustererNN.mNnClusterizerElementSize; + uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize); + uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize); CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); - CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; - int row = static_cast(peak.row()), pad = static_cast(peak.pad()); + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; + int32_t row = static_cast(peak.row()), pad = static_cast(peak.pad()); if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) { - uint top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; + uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; CfChargePos tmp_pos = peak.delta(d); @@ -142,8 +142,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(pad) / GPUTPCGeometry::NPads(row); } } else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) { - int time = static_cast(peak.time()); - int r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; + int32_t time = static_cast(peak.time()); + int32_t r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); if (is_row_boundary) { if (dtype == 0) { @@ -152,15 +152,16 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.mNnClusterizerBoundaryFillValue); } } else { - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); - int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); - int rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); - int p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; - bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); + int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); + int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); + int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); + int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; + int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; + + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (t < 0 || t >= TPC_MAX_FRAGMENT_LEN_GPU); if (!is_boundary) { float central_charge = static_cast(chargeMap[peak].unpack()); - int t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); @@ -179,9 +180,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); if (dtype == 0) { processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold); } else if (dtype == 1) { @@ -190,14 +191,14 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { auto& clustererNN = processors.tpcNNClusterer[sector]; - uint glo_idx = get_global_id(0); - uint elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes; + uint32_t glo_idx = get_global_id(0); + uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes; float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] - uint class_label = 0; - for (uint pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) { + uint32_t class_label = 0; + for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) { if (pIdx == elem_iterator) { if (dtype == 0) { current_max_prob = static_cast(clustererNN.mModelProbabilities_16[pIdx]); @@ -212,7 +213,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 1) { clustererNN.mClusterFlags[2 * glo_idx] = 1; @@ -221,25 +222,30 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters; + uint32_t full_glo_idx = glo_idx + batchStart; + if (full_glo_idx >= maxClusterNum) { + return; + } + int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; - uint full_glo_idx = glo_idx + batchStart; - int model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes; // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.mNnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); - if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { + if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; @@ -291,7 +297,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; - uint full_glo_idx = glo_idx + batchStart; - int model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes; + uint32_t full_glo_idx = glo_idx + batchStart; + uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes; if (clustererNN.mOutputDataClass[full_glo_idx] > 0) { @@ -384,7 +390,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 62 ? global_shift : 0); } -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t global_shift) { if (pad < 0 || row < 0) { // Faster short-circuit return true; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index dc7f537c6c1e8..dac2bf9554849 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -73,11 +73,12 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate private: static GPUd() void fillInputData(int32_t, int32_t, int32_t, int32_t, processorType&, uint8_t, int8_t, uint); static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + static GPUd() uint32_t sortIntoBuckets(GPUTPCClusterFinder&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t); static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); - static GPUd() int padOffset(int, int); - static GPUd() int rowOffset(int, int); - static GPUd() bool isBoundary(int, int, int); + static GPUd() int32_t padOffset(int32_t, int32_t); + static GPUd() int32_t rowOffset(int32_t, int32_t); + static GPUd() bool isBoundary(int32_t, int32_t, int32_t); }; } // namespace o2::gpu From e7d217af61b67a12b508451c2f2ea925fda92f6e Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Mon, 19 May 2025 14:29:21 +0200 Subject: [PATCH 0379/1764] [EMCAL-688] EMCAL: Add fCross to the AnalysisCluster (#14282) - Add fCross the exoticity parameter to the AnalysisCluster so we can easily access it later in the emcalCorrectionTask in O2Physics --- .../EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h | 8 ++++++-- .../EMCAL/base/include/EMCALBase/ClusterFactory.h | 5 +++-- Detectors/EMCAL/base/src/ClusterFactory.cxx | 12 ++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h b/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h index 758e0a1fa0b47..e19fd17dea2ce 100644 --- a/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h +++ b/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h @@ -182,6 +182,9 @@ class AnalysisCluster float getCoreEnergy() const { return mCoreEnergy; } void setCoreEnergy(float energy) { mCoreEnergy = energy; } + float getFCross() const { return mFCross; } + void setFCross(float fCross) { mFCross = fCross; } + /// /// Returns TLorentzVector with momentum of the cluster. Only valid for clusters /// identified as photons or pi0 (overlapped gamma) produced on the vertex @@ -223,12 +226,13 @@ class AnalysisCluster float mTime = 0.; ///< Time of the digit/cell with maximal energy deposition bool mIsExotic = false; //!::buildCluster(int clusterIn float exoticTime = mInputsContainer[inputIndMax].getTimeStamp(); + float fCross = 0.; + try { - clusterAnalysis.setIsExotic(isExoticCell(towerId, inputEnergyMax, exoticTime)); + clusterAnalysis.setIsExotic(isExoticCell(towerId, inputEnergyMax, exoticTime, fCross)); + clusterAnalysis.setFCross(fCross); } catch (UninitLookUpTableException& e) { LOG(error) << e.what(); } @@ -253,7 +256,7 @@ void ClusterFactory::evalLocalPosition(gsl::span inputsInd clRmsXYZ[i] += (w * xyzi[i] * xyzi[i]); } } // w > 0 - } // dig loop + } // dig loop // cout << " wtot " << wtot << endl; @@ -600,7 +603,7 @@ std::tuple ClusterFactory::getMaximalEnergyI /// Look to cell neighbourhood and reject if it seems exotic //____________________________________________________________________________ template -bool ClusterFactory::isExoticCell(short towerId, float ecell, float const exoticTime) const +bool ClusterFactory::isExoticCell(short towerId, float ecell, float const exoticTime, float& fCross) const { if (ecell < mExoticCellMinAmplitude) { return false; // do not reject low energy cells @@ -612,8 +615,9 @@ bool ClusterFactory::isExoticCell(short towerId, float ecell, float c } float eCross = getECross(towerId, ecell, exoticTime); + fCross = 1.f - eCross / ecell; - if (1 - eCross / ecell > mExoticCellFraction) { + if (fCross > mExoticCellFraction) { LOG(debug) << "EXOTIC CELL id " << towerId << ", eCell " << ecell << ", eCross " << eCross << ", 1-eCross/eCell " << 1 - eCross / ecell; return true; } From 762cef75f0f7ce4c23c81e78efb439c72bdc7970 Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Mon, 19 May 2025 14:30:02 +0200 Subject: [PATCH 0380/1764] [EMCAL-689] EMCAL/Geometry: Add function to load alignment matrix from CCDB (#14237) - Add function `SetMisalMatrixFromCcdb` to set the missalignment matrices for the EMCal via the CCDB. This way they are not laoded from the GeoManager. The function expects a path inside the ccdb, which by default is set to `"Users/m/mhemmer/EMCAL/Config/GeometryAligned"` and the timestamp. Currently in the CCDB we only have the old Run 2 alignment object. Once we have the new alignment objects we can uplaod them and via the correct time stamp load them fittingly. --- .../EMCAL/base/include/EMCALBase/Geometry.h | 9 +++++- Detectors/EMCAL/base/src/Geometry.cxx | 31 +++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/Detectors/EMCAL/base/include/EMCALBase/Geometry.h b/Detectors/EMCAL/base/include/EMCALBase/Geometry.h index 04dcaa3b802de..4d4a947de88ca 100644 --- a/Detectors/EMCAL/base/include/EMCALBase/Geometry.h +++ b/Detectors/EMCAL/base/include/EMCALBase/Geometry.h @@ -22,7 +22,9 @@ #include #include #include +#include +#include "CCDB/BasicCCDBManager.h" #include "DataFormatsEMCAL/Constants.h" #include "EMCALBase/GeometryBase.h" #include "MathUtils/Cartesian.h" @@ -57,7 +59,7 @@ class Geometry /// | EMCAL_COMPLETE12SMV1_DCAL | Full EMCAL, 10 DCAL Supermodules (not used in practice) | /// | EMCAL_COMPLETE12SMV1_DCAL_8SM | Full EMCAL, 8 DCAL Supermodules (run2) | /// | EMCAL_COMPLETE12SMV1_DCAL_DEV | Full EMCAL, DCAL development geometry (not used) | - Geometry(const std::string_view name, const std::string_view mcname = "", const std::string_view mctitle = ""); + explicit Geometry(const std::string_view name, const std::string_view mcname = "", const std::string_view mctitle = ""); /// \brief Copy constructor. Geometry(const Geometry& geom); @@ -564,6 +566,11 @@ class Geometry /// void SetMisalMatrix(const TGeoHMatrix* m, Int_t smod) const; + /// + /// Method to set shift-rotational matrixes from CCDB + /// + void SetMisalMatrixFromCcdb(const char* path = "Users/m/mhemmer/EMCAL/Config/GeometryAligned", int timestamp = 10000) const; + /// /// Transform clusters cell position into global with alternative method, taking into account the depth calculation. /// Input are: diff --git a/Detectors/EMCAL/base/src/Geometry.cxx b/Detectors/EMCAL/base/src/Geometry.cxx index 920dc24823e83..6eff6c161f2a1 100644 --- a/Detectors/EMCAL/base/src/Geometry.cxx +++ b/Detectors/EMCAL/base/src/Geometry.cxx @@ -8,16 +8,21 @@ // In applying this license CERN does not waive the privileges and immunities // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "EMCALBase/Geometry.h" + +#include + #include +#include +#include +#include +#include #include #include #include #include -#include - -#include "EMCALBase/Geometry.h" #include "EMCALBase/ShishKebabTrd1Module.h" #include @@ -1557,6 +1562,7 @@ const TGeoHMatrix* Geometry::GetMatrixForSuperModule(Int_t smod) const if (!SMODULEMATRIX[smod]) { if (gGeoManager) { + LOG(info) << "Loading EMCAL misalignment matrix for SM " << smod << " from GeoManager."; SetMisalMatrix(GetMatrixForSuperModuleFromGeoManager(smod), smod); } else { LOG(fatal) << "Cannot find EMCAL misalignment matrices! Recover them either: \n" @@ -1762,6 +1768,25 @@ void Geometry::SetMisalMatrix(const TGeoHMatrix* m, Int_t smod) const } } +void Geometry::SetMisalMatrixFromCcdb(const char* path, int timestamp) const +{ + LOG(info) << "Using CCDB to obtain EMCal alignment."; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + api.init("http://alice-ccdb.cern.ch"); + TObjArray* matrices = api.retrieveFromTFileAny(path, metadata, timestamp); + + for (int iSM = 0; iSM < mNumberOfSuperModules; ++iSM) { + TGeoHMatrix* mat = reinterpret_cast(matrices->At(iSM)); + if (mat) { + + SetMisalMatrix(mat, iSM); + } else { + LOG(info) << "Could not obtain Alignment Matrix for SM " << iSM; + } + } +} + Bool_t Geometry::IsDCALSM(Int_t iSupMod) const { if (mEMCSMSystem[iSupMod] == DCAL_STANDARD || mEMCSMSystem[iSupMod] == DCAL_EXT) { From e2f0fa3f1da3ecba8e1d446de4fd456fdd61b359 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Mon, 19 May 2025 18:55:40 +0200 Subject: [PATCH 0381/1764] fix: suppressing excesive output from raw decoder (#14291) * fix: suppressing excesive output from raw decoder * clang * fix * fix --- .../include/DataFormatsCTP/Configuration.h | 2 +- .../Detectors/CTP/src/Configuration.cxx | 12 ++++- .../CTPReconstruction/RawDataDecoder.h | 10 ++-- .../CTP/reconstruction/src/RawDataDecoder.cxx | 32 ++++++------ .../include/CTPWorkflow/RawDecoderSpec.h | 11 ++++ Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 50 +++++++++++++------ 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h index fdd73986f1eaf..e9464089d71fc 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h @@ -160,7 +160,7 @@ class CTPConfiguration const std::vector& getCTPClasses() const { return mCTPClasses; } // Read-only interface uint64_t getInputMask(const std::string& name) const; int getInputIndex(const std::string& name) const; - std::string getClassNameFromIndex(int index) { return mCTPClasses[index].name; }; + std::string getClassNameFromIndex(int index); std::string getClassNameFromHWIndex(int index); const CTPClass* getCTPClassFromHWIndex(const int index) const; bool isMaskInInputs(const uint64_t& mask) const; diff --git a/DataFormats/Detectors/CTP/src/Configuration.cxx b/DataFormats/Detectors/CTP/src/Configuration.cxx index 38a49132db3d1..61e51bcb20d91 100644 --- a/DataFormats/Detectors/CTP/src/Configuration.cxx +++ b/DataFormats/Detectors/CTP/src/Configuration.cxx @@ -780,6 +780,15 @@ int CTPConfiguration::getInputIndex(const std::string& name) const LOG(info) << "input:" << name << " index:" << index; return index; } +std::string CTPConfiguration::getClassNameFromIndex(int index) +{ + if (index < (int)mCTPClasses.size()) { + return mCTPClasses[index].name; + } else { + std::string name = "Cls" + std::to_string(index); + return name; + } +}; std::string CTPConfiguration::getClassNameFromHWIndex(int index) { for (auto& cls : mCTPClasses) { @@ -924,8 +933,9 @@ uint64_t CTPConfiguration::getTriggerClassMaskWInputsNoTrgDets() const exclude += cls.name.find("EMC") != std::string::npos; exclude += cls.name.find("TRD") != std::string::npos; exclude += cls.name.find("HMP") != std::string::npos; - if (!exclude) + if (!exclude) { clsmask |= cls.classMask; + } } return clsmask; } diff --git a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h index 8ebc7e0304561..53addf32c538f 100644 --- a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h +++ b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h @@ -60,7 +60,8 @@ class RawDataDecoder std::array getClassErrorsB() { return mClassErrorsB; } std::array getClassCountersA() { return mClassCountersA; } std::array getClassCountersB() { return mClassCountersB; } - int getLostDueToShift() { return mLostDueToShift; } + int getLostDueToShiftCls() { return mLostDueToShiftCC; } + int getLostDueToShiftInp() { return mLostDueToShiftInps; } private: static constexpr uint32_t TF_TRIGGERTYPE_MASK = 0x800; @@ -80,8 +81,8 @@ class RawDataDecoder gbtword80_t mTVXMask = 0x4; // TVX is 3rd input gbtword80_t mVBAMask = 0x20; // VBA is 6 th input bool mVerbose = false; - uint32_t mIRRejected = 0; - uint32_t mTCRRejected = 0; + int mIRRejected = 0; + int mTCRRejected = 0; bool mPadding = true; uint32_t mTFOrbit = 0; std::vector mTFOrbits; @@ -94,7 +95,8 @@ class RawDataDecoder std::array mClassErrorsB{}; // from inputs std::array mClassCountersA{}; std::array mClassCountersB{}; // from inputs - int mLostDueToShift = 0; + int mLostDueToShiftCC = 0; + int mLostDueToShiftInps = 0; CTPConfiguration mCTPConfig; }; } // namespace ctp diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index b216f5ec54570..a062a262acf62 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -296,12 +296,12 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask) { // int nClasswoInp = 0; // counting classes without input which should never happen + int lost = 0; std::map digitsMapShifted; auto L0shift = o2::ctp::TriggerOffsetsParam::Instance().LM_L0; auto L1shift = L0shift + o2::ctp::TriggerOffsetsParam::Instance().L0_L1; @@ -551,7 +554,7 @@ int RawDataDecoder::shiftInputs(std::map& digit if (lut == 0 || lut == 1) { // no inps or LM digitsMapShifted[dig.first] = dig.second; } else if (lut == 2) { // L0 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); if (dig.second.CTPClassMask.count()) { // LOG(error) << "Adding class mask without input ?"; // This is not needed as it can happen; Full checj done below - see next LOG(error) @@ -559,30 +562,30 @@ int RawDataDecoder::shiftInputs(std::map& digit digitsMapShifted[dig.first] = digi; } } else if (lut == 4) { // L1 - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); if (dig.second.CTPClassMask.count()) { CTPDigit digi = {dig.first, 0, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } } else if (lut == 6) { // L0 and L1 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); if (dig.second.CTPClassMask.count()) { CTPDigit digi = {dig.first, 0, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } } else if (lut == 3) { // LM and L0 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); CTPDigit digi = {dig.first, inpmask & (~L0MASKInputs), dig.second.CTPClassMask}; // if LM level do not need to add class as LM is not shifted; digitsMapShifted[dig.first] = digi; } else if (lut == 5) { // LM and L1 - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); CTPDigit digi = {dig.first, inpmask & (~L1MASKInputs), dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } else if (lut == 7) { // LM and L0 and L1 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); CTPDigit digi = {dig.first, inpmaskLM, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } else { @@ -592,7 +595,7 @@ int RawDataDecoder::shiftInputs(std::map& digit for (auto const& dig : digitsMapShifted) { digits.push_back(dig.second); } - return 0; + return lost; } // int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask, uint64_t trgclassmaskNoTrgDet) @@ -654,16 +657,13 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, mClassErrorsB[cls.getIndex()]++; ret = 256; } else { - mLostDueToShift++; + mLostDueToShiftCC++; } } } } } } - if (mLostDueToShift) { - LOG(debug) << "LOST classes because of shift:" << mLostDueToShift; - } return ret; } // diff --git a/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h b/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h index a5a1a75a0b594..3198e5c33e219 100644 --- a/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h +++ b/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h @@ -74,6 +74,17 @@ class RawDecoderSpec : public framework::Task std::deque mHistoryT; std::deque mHistoryV; RawDataDecoder mDecoder; + // Errors + int mLostDueToShiftInps = 0; + int mErrorIR = 0; + int mErrorTCR = 0; + int mIRRejected = 0; + int mTCRRejected = 0; + std::array mClsEA{}; + std::array mClsEB{}; // from inputs + std::array mClsA{}; + std::array mClsB{}; // from inputs + bool mCheckConsistency = false; }; /// \brief Creating DataProcessorSpec for the CTP diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 2df6bc981ce44..041e6cb472ebb 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -26,6 +26,8 @@ using namespace o2::ctp::reco_workflow; void RawDecoderSpec::init(framework::InitContext& ctx) { + mCheckConsistency = ctx.options().get("check-consistency"); + mDecoder.setCheckConsistency(mCheckConsistency); mDecodeinputs = ctx.options().get("ctpinputs-decoding"); mDecoder.setDecodeInps(mDecodeinputs); mNTFToIntegrate = ctx.options().get("ntf-to-average"); @@ -43,7 +45,7 @@ void RawDecoderSpec::init(framework::InitContext& ctx) mOutputLumiInfo.inp2 = inp2; mMaxInputSize = ctx.options().get("max-input-size"); mMaxInputSizeFatal = ctx.options().get("max-input-size-fatal"); - LOG(info) << "CTP reco init done. Inputs decoding here:" << mDecodeinputs << " DoLumi:" << mDoLumi << " DoDigits:" << mDoDigits << " NTF:" << mNTFToIntegrate << " Lumi inputs:" << lumiinp1 << ":" << inp1 << " " << lumiinp2 << ":" << inp2 << " Max errors:" << maxerrors << " Max input size:" << mMaxInputSize << " MaxInputSizeFatal:" << mMaxInputSizeFatal; + LOG(info) << "CTP reco init done. Inputs decoding here:" << mDecodeinputs << " DoLumi:" << mDoLumi << " DoDigits:" << mDoDigits << " NTF:" << mNTFToIntegrate << " Lumi inputs:" << lumiinp1 << ":" << inp1 << " " << lumiinp2 << ":" << inp2 << " Max errors:" << maxerrors << " Max input size:" << mMaxInputSize << " MaxInputSizeFatal:" << mMaxInputSizeFatal << " CheckConsistency:" << mCheckConsistency; // mOutputLumiInfo.printInputs(); } void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) @@ -69,22 +71,22 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) o0 = TFOrbits[i]; } std::cout << std::endl; - LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); - LOG(info) << "Number of missing TF:" << nmiss << std::endl; - if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) { - LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + LOG(info) << "Number of non continous TF:" << nmiss << std::endl; + LOG(info) << "Lost in shiftInputs:" << mLostDueToShiftInps; + LOG(info) << "Lost in addDigit Inputs:" << mIRRejected << " Classes:" << mTCRRejected; + if (mErrorIR || mErrorTCR) { + LOG(error) << "# of IR errors:" << mErrorIR << " TCR errors:" << mErrorTCR << std::endl; } - std::array clsA = mDecoder.getClassCountersA(); - std::array clsB = mDecoder.getClassCountersB(); - std::array clsEA = mDecoder.getClassErrorsA(); - std::array clsEB = mDecoder.getClassErrorsB(); - - for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { - bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; - if (clsEA[i]) { - LOG(error) << " Class without inputs:"; + if (mCheckConsistency) { + LOG(info) << "Lost due to the shift Consistency Checker:" << mDecoder.getLostDueToShiftCls(); + auto ctpcfg = mDecoder.getCTPConfig(); + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + std::string name = ctpcfg.getClassNameFromIndex(i); + if (mClsEA[i]) { + LOG(error) << " Class without inputs:"; + } + LOG(important) << "CLASS:" << name << ":" << i << " Cls=>Inp:" << mClsA[i] << " Inp=>Cls:" << mClsB[i] << " ErrorsCls=>Inps:" << mClsEA[i] << " MissingInps=>Cls:" << mClsEB[i]; } - LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; } } void RawDecoderSpec::run(framework::ProcessingContext& ctx) @@ -161,6 +163,21 @@ void RawDecoderSpec::run(framework::ProcessingContext& ctx) if (mDoDigits) { LOG(info) << "[CTPRawToDigitConverter - run] Writing " << mOutputDigits.size() << " digits. IR rejected:" << mDecoder.getIRRejected() << " TCR rejected:" << mDecoder.getTCRRejected(); ctx.outputs().snapshot(o2::framework::Output{"CTP", "DIGITS", 0}, mOutputDigits); + mLostDueToShiftInps += mDecoder.getLostDueToShiftInp(); + mErrorIR += mDecoder.getErrorIR(); + mErrorTCR += mDecoder.getErrorTCR(); + mIRRejected += mDecoder.getIRRejected(); + mTCRRejected += mDecoder.getTCRRejected(); + auto clsEA = mDecoder.getClassErrorsA(); + auto clsEB = mDecoder.getClassErrorsB(); + auto cntCA = mDecoder.getClassCountersA(); + auto cntCB = mDecoder.getClassCountersB(); + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + mClsEA[i] += clsEA[i]; + mClsEB[i] += clsEB[i]; + mClsA[i] += cntCA[i]; + mClsB[i] += cntCB[i]; + } } if (mDoLumi) { uint32_t tfCountsT = 0; @@ -236,7 +253,8 @@ o2::framework::DataProcessorSpec o2::ctp::reco_workflow::getRawDecoderSpec(bool {"lumi-inp2", o2::framework::VariantType::String, "VBA", {"The second input used for online lumi. Name in capital."}}, {"use-verbose-mode", o2::framework::VariantType::Bool, false, {"Verbose logging"}}, {"max-input-size", o2::framework::VariantType::Int, 0, {"Do not process input if bigger than max size, 0 - do not check"}}, - {"max-input-size-fatal", o2::framework::VariantType::Bool, false, {"If true issue fatal error otherwise error on;y"}}, + {"max-input-size-fatal", o2::framework::VariantType::Bool, false, {"If true issue fatal error otherwise error only"}}, + {"check-consistency", o2::framework::VariantType::Bool, false, {"If true checks digits consistency using ctp config"}}, {"ctpinputs-decoding", o2::framework::VariantType::Bool, false, {"Inputs alignment: true - raw decoder - has to be compatible with CTF decoder: allowed options: 10,01,00"}}}}; } void RawDecoderSpec::updateTimeDependentParams(framework::ProcessingContext& pc) From 769ba3364776be99859990b6523814297d207aec Mon Sep 17 00:00:00 2001 From: swenzel Date: Mon, 19 May 2025 17:58:22 +0200 Subject: [PATCH 0382/1764] Fix units for GeneratorFromEventPool --- Generators/src/GeneratorFromFile.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx index e37a3886c24e1..66f7e03a4cf15 100644 --- a/Generators/src/GeneratorFromFile.cxx +++ b/Generators/src/GeneratorFromFile.cxx @@ -385,6 +385,11 @@ GeneratorFromEventPool::GeneratorFromEventPool(EventPoolGenConfig const& pars) : bool GeneratorFromEventPool::Init() { + // this simply passes tracks trough. Leave units intact. + setTimeUnit(1.); + setPositionUnit(1.); + setEnergyUnit(1.); + // initialize the event pool if (mConfig.rngseed > 0) { mRandomEngine.seed(mConfig.rngseed); @@ -588,4 +593,4 @@ std::vector GeneratorFromEventPool::setupFileUniverse(std::string c ClassImp(o2::eventgen::GeneratorFromEventPool); ClassImp(o2::eventgen::GeneratorFromFile); -ClassImp(o2::eventgen::GeneratorFromO2Kine); \ No newline at end of file +ClassImp(o2::eventgen::GeneratorFromO2Kine); From 0a9fbfa7809b174632895e1e804ab0ae42c0e2f3 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 20 May 2025 10:39:14 +0200 Subject: [PATCH 0383/1764] DPL Analysis: fix ineffective function for Builds<> (#14297) --- Framework/Core/include/Framework/AnalysisManagers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 1d894b2b67948..dfec2256875c9 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -294,7 +294,7 @@ bool prepareOutput(ProcessingContext& context, T& spawns) } template -bool prepareOuput(ProcessingContext& context, T& builds) +bool prepareOutput(ProcessingContext& context, T& builds) { using metadata = o2::aod::MetadataTrait>::metadata; return builds.template build(builds.pack(), extractOriginals(context)); From 128a030847822127c42ba6e92f606d2f87b55409 Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Tue, 20 May 2025 11:19:39 +0200 Subject: [PATCH 0384/1764] Github Actions: Setup dependabot (#14292) Related: https://github.com/AliceO2Group/O2Physics/pull/10660 --- .github/dependabot.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000000..30ad6d8f005b3 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +--- +# Dependabot configuration +# Reference: https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" From dbf8b73a42164b608f91ed2095f8550788672dea Mon Sep 17 00:00:00 2001 From: Daniel Battistini <60930860+danielbattistini@users.noreply.github.com> Date: Tue, 20 May 2025 18:06:53 +0200 Subject: [PATCH 0385/1764] Fix the thickness of logical volumes for kTurboStaves and kStaggered configurations of the tracker (#14268) --- .../TRK/simulation/include/TRKSimulation/TRKLayer.h | 3 +++ Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h index ef355ec36ce2f..ba894f6d7a92b 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h @@ -45,6 +45,9 @@ class TRKLayer void createLayer(TGeoVolume* motherVolume); private: + // TGeo objects outside logical volumes can cause errors. Only used in case of kStaggered and kTurboStaves layouts + static constexpr float mLogicalVolumeThickness = 1; + int mLayerNumber; std::string mLayerName; float mInnerRadius; diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx index e6b00f6e96425..a95418afbba25 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx @@ -120,7 +120,7 @@ TGeoVolume* TRKLayer::createStave(std::string type, double width) staveVol->AddNode(chipVol, 1, nullptr); } else if (type == "staggered") { double width = mModuleWidth * 2; // Each stave has two modules (based on the LOI design) - stave = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + stave = new TGeoBBox(width / 2, mLogicalVolumeThickness / 2, mZ / 2); TGeoVolume* chipVolLeft = createChip("flat", mModuleWidth); TGeoVolume* chipVolRight = createChip("flat", mModuleWidth); staveVol = new TGeoVolume(staveName.c_str(), stave, medAir); @@ -152,7 +152,11 @@ void TRKLayer::createLayer(TGeoVolume* motherVolume) chipName = o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mLayerNumber), sensName = Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mLayerNumber); - TGeoTube* layer = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); + double layerThickness = mChipThickness; + if (mLayout != eLayout::kCylinder) { + layerThickness = mLogicalVolumeThickness; + } + TGeoTube* layer = new TGeoTube(mInnerRadius - 0.333 * layerThickness, mInnerRadius + 0.667 * layerThickness, mZ / 2); TGeoVolume* layerVol = new TGeoVolume(mLayerName.c_str(), layer, medAir); layerVol->SetLineColor(kYellow); From 930d83707083b9916ee60c18fa1680e75f8ff5f8 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 20 May 2025 19:27:52 +0200 Subject: [PATCH 0386/1764] DPL Analysis: move spawner caches outside of the spawner function (#14281) --- .../Core/include/Framework/AnalysisHelpers.h | 12 ++++- .../Core/include/Framework/AnalysisManagers.h | 14 ++++- .../Core/include/Framework/TableBuilder.h | 53 +++++-------------- Framework/Core/src/AODReaderHelpers.cxx | 8 ++- Framework/Core/test/test_TableSpawner.cxx | 7 ++- 5 files changed, 45 insertions(+), 49 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 55d2490dff1bc..95be6c7e407b3 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -297,6 +297,7 @@ struct Spawns : decltype(transformBase()) { using extension_t = typename metadata::extension_table_t; using base_table_t = typename metadata::base_table_t; using expression_pack_t = typename metadata::expression_pack_t; + static constexpr size_t N = framework::pack_size(expression_pack_t{}); constexpr auto pack() { @@ -318,7 +319,13 @@ struct Spawns : decltype(transformBase()) { } std::shared_ptr table = nullptr; std::shared_ptr extension = nullptr; + std::array projectors = [](framework::pack) -> std::array + { + return {{std::move(C::Projector())...}}; + } + (expression_pack_t{}); std::shared_ptr projector = nullptr; + std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); }; template @@ -365,6 +372,7 @@ struct Defines : decltype(transformBase()) { std::array projectors; std::shared_ptr projector = nullptr; + std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); }; template @@ -828,8 +836,10 @@ template auto Extend(T const& table) { using output_t = Join, o2::aod::Hash<"JOIN/0"_h>, o2::aod::Hash<"JOIN"_h>, Cs...>>; + static std::array projectors{{std::move(Cs::Projector())...}}; static std::shared_ptr projector = nullptr; - return output_t{{o2::framework::spawner(framework::pack{}, {table.asArrowTable()}, "dynamicExtension", projector), table.asArrowTable()}, 0}; + static auto schema = std::make_shared(o2::soa::createFieldsFromColumns(framework::pack{})); + return output_t{{o2::framework::spawner(framework::pack{}, {table.asArrowTable()}, "dynamicExtension", projectors.data(), projector, schema), table.asArrowTable()}, 0}; } /// Template function to attach dynamic columns on-the-fly (e.g. inside diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index dfec2256875c9..2a052c0b07218 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -287,8 +287,13 @@ bool prepareOutput(ProcessingContext& context, T& spawns) using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } + using D = o2::aod::Hash; - spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), spawns.projector)); + spawns.extension = std::make_shared(o2::framework::spawner(originalTable, + o2::aod::label(), + spawns.projectors.data(), + spawns.projector, + spawns.schema)); spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } @@ -309,8 +314,13 @@ bool prepareOutput(ProcessingContext& context, T& defines) using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } + using D = o2::aod::Hash; - defines.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), defines.projectors.data(), defines.projector)); + defines.extension = std::make_shared(o2::framework::spawner(originalTable, + o2::aod::label(), + defines.projectors.data(), + defines.projector, + defines.schema)); defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index e2d12789ef922..f941bf29bd8c8 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -768,80 +768,51 @@ std::shared_ptr spawnerHelper(std::shared_ptr const& /// Expression-based column generator to materialize columns template requires(soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) +auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); + return spawnerHelper(fullTable, schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template requires(soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) +auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { - using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, placeholders_pack_t{}); - } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); + return spawner(fullTable, name, projectors, projector, schema); } template requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, std::shared_ptr& projector) +auto spawner(std::shared_ptr const& fullTable, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); - - auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (expression_pack_t{}); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); + return spawnerHelper(fullTable, schema, framework::pack_size(expression_pack_t{}), projectors, name, projector); } template requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, std::shared_ptr& projector) +auto spawner(std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { - using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, expression_pack_t{}); - } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); - auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (expression_pack_t{}); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); + return spawner(fullTable, name, projectors, projector, schema); } template -auto spawner(framework::pack columns, std::vector>&& tables, const char* name, std::shared_ptr& projector) +auto spawner(framework::pack, std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { std::array labels{"original"}; auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(columns)); - std::array projectors{{std::move(C::Projector())...}}; - return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), name, projector); + return spawnerHelper(fullTable, schema, sizeof...(C), projectors, name, projector); } template diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index c413f2520919d..4dbd2877476be 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -158,7 +158,13 @@ auto make_spawn(InputSpec const& input, ProcessingContext& pc) using metadata_t = o2::aod::MetadataTrait::metadata; constexpr auto sources = metadata_t::sources; static std::shared_ptr projector = nullptr; - return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projector); + static std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(typename metadata_t::expression_pack_t{})); + static auto projectors = [](framework::pack) -> std::array + { + return {{std::move(C::Projector())...}}; + } + (typename metadata_t::expression_pack_t{}); + return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projectors.data(), projector, schema); } } // namespace diff --git a/Framework/Core/test/test_TableSpawner.cxx b/Framework/Core/test/test_TableSpawner.cxx index 2291ba5f4f787..e200adf37ccb4 100644 --- a/Framework/Core/test/test_TableSpawner.cxx +++ b/Framework/Core/test/test_TableSpawner.cxx @@ -50,10 +50,9 @@ TEST_CASE("TestTableSpawner") auto t1 = b1.finalize(); Points st1{t1}; - std::shared_ptr projector = nullptr; - auto expoints_a = o2::soa::Extend(st1); - auto extension = ExPointsExtension{o2::framework::spawner>(t1, o2::aod::Hash<"ExPoints"_h>::str, projector)}; + Spawns s; + auto extension = ExPointsExtension{o2::framework::spawner>(t1, o2::aod::Hash<"ExPoints"_h>::str, s.projectors.data(), s.projector, s.schema)}; auto expoints = ExPoints{{t1, extension.asArrowTable()}, 0}; REQUIRE(expoints_a.size() == 9); @@ -81,7 +80,7 @@ TEST_CASE("TestTableSpawner") Defines excpts; excpts.projectors[0] = test::x * test::x + test::y * test::y + test::z * test::z; - auto extension_2 = ExcPointsCfgExtension{o2::framework::spawner>({t1}, o2::aod::Hash<"ExcPoints"_h>::str, excpts.projectors.data(), excpts.projector)}; + auto extension_2 = ExcPointsCfgExtension{o2::framework::spawner>({t1}, o2::aod::Hash<"ExcPoints"_h>::str, excpts.projectors.data(), excpts.projector, excpts.schema)}; auto excpoints = ExcPoints{{t1, extension_2.asArrowTable()}, 0}; rex = extension.begin(); From 23781677b66c802d8f8ea8e2dbb390b425d91bec Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 20 May 2025 23:59:09 +0200 Subject: [PATCH 0387/1764] GPU TPC NN Clusterizer: Fix compilation without ONNX --- GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 6c4e60a6025e1..64e6f5a31aaa7 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -1198,6 +1198,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { +#ifdef GPUCA_HAS_ONNX if (GetProcessingSettings().nn.applyNNclusterizer) { LOG(info) << "(ORT) Environment releasing..."; GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; @@ -1205,6 +1206,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) nnApplication.mModelReg1.release(true); nnApplication.mModelReg2.release(true); } +#endif if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } From 546f79341f0407ed993b08046b97ef3f7a34e8cd Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 21 May 2025 16:06:32 +0200 Subject: [PATCH 0388/1764] DPL: fix error reporting (#14306) Any oldest possible timeframe message was accounted as error. Maybe we should simply drop the metric... --- Framework/Core/src/DataProcessingDevice.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index ae25d8d3a915c..c303af4858234 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -2107,7 +2107,7 @@ void DataProcessingDevice::handleData(ServiceRegistryRef ref, InputChannelInfo& LOGP(debug, "Got DomainInfoHeader, new oldestPossibleTimeslice {} on channel {}", oldestPossibleTimeslice, info.id.value); parts.At(headerIndex).reset(nullptr); parts.At(payloadIndex).reset(nullptr); - } + } break; case InputType::Invalid: { reportError("Invalid part found."); } break; From 981cd40881706e1cf56a2e99d2778c683835153f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 20 May 2025 16:07:57 +0200 Subject: [PATCH 0389/1764] GPU: Add possibility to dump raw data in case of error --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 + GPU/GPUTracking/Base/GPUReconstruction.h | 11 + .../Base/GPUReconstructionDebug.cxx | 188 ++++++++++++++++++ .../Base/GPUReconstructionLibrary.cxx | 2 +- GPU/GPUTracking/CMakeLists.txt | 1 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 8 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 28 ++- GPU/GPUTracking/Global/GPUChainTracking.h | 7 +- .../GPUChainTrackingDebugAndProfiling.cxx | 24 ++- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 81 ++++---- GPU/GPUTracking/Global/GPUErrorCodes.h | 1 + GPU/GPUTracking/Global/GPUErrors.cxx | 8 +- GPU/GPUTracking/Global/GPUErrors.h | 2 +- 13 files changed, 313 insertions(+), 50 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionDebug.cxx diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index c76bf11c3e25d..a4e5d5e1189f5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -193,6 +193,7 @@ int32_t GPUReconstruction::Init() } mSlaves[i]->ClearAllocatedMemory(); } + debugInit(); return 0; } @@ -469,6 +470,7 @@ int32_t GPUReconstruction::Exit() if (mInitialized) { ExitDevice(); } + debugExit(); mInitialized = false; return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index d5c0b8e828087..e0c866fd9421b 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,9 @@ class GPUReconstruction virtual void PrintKernelOccupancies() {} double GetStatKernelTime() { return mStatKernelTime; } double GetStatWallTime() { return mStatWallTime; } + void setDebugDumpCallback(std::function&& callback = std::function(nullptr)); + bool triggerDebugDump(); + std::string getDebugFolder(const std::string& prefix = ""); // empty string = no debug // Threading std::shared_ptr mThreading; @@ -407,6 +411,13 @@ class GPUReconstruction }; static std::shared_ptr sLibCUDA, sLibHIP, sLibOCL; + // Debugging + struct debugInternal; + static std::unique_ptr mDebugData; + bool mDebugEnabled = false; + void debugInit(); + void debugExit(); + static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx b/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx new file mode 100644 index 0000000000000..c1c31eedde1b2 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx @@ -0,0 +1,188 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionDebug.cxx +/// \author David Rohr + +#include "GPUReconstruction.h" +#include "GPULogging.h" +#include "GPUSettings.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace o2::gpu; + +struct GPUReconstruction::debugInternal { + std::function signalCallback; + std::function debugCallback = nullptr; + std::function reinstallCallback = nullptr; + std::unordered_map oldActions; + size_t debugCount = 0; + static void globalCallback(int32_t signal, siginfo_t* info, void* ucontext) + { + GPUReconstruction::mDebugData->signalCallback(signal, info, ucontext); + } +}; + +std::unique_ptr GPUReconstruction::mDebugData; + +void GPUReconstruction::debugInit() +{ + if (GetProcessingSettings().debugOnFailure) { + static std::mutex initMutex; + { + std::lock_guard guard(initMutex); + if (mDebugData) { + GPUFatal("Error handlers for debug dumps already set, cannot set them again"); + } + mDebugData = std::make_unique(); + } + mDebugEnabled = true; + if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 2)) { + struct sigaction sa, oldsa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = GPUReconstruction::debugInternal::globalCallback; + sa.sa_flags = SA_SIGINFO; + uint32_t mask = GetProcessingSettings().debugOnFailureSignalMask == (uint32_t)-1 ? ((1 << SIGINT) | (1 << SIGABRT) | (1 << SIGBUS) | (1 << SIGTERM) | (1 << SIGSEGV)) : GetProcessingSettings().debugOnFailureSignalMask; + if (mask) { + for (uint32_t i = 0; i < sizeof(mask) * 8; i++) { + if (mask & (1 << i)) { + if (sigaction(i, &sa, &oldsa)) { + GPUFatal("Error installing signal handler for error dump on signal %d", i); + } + mDebugData->oldActions.emplace(i, oldsa); + } + } + } + + mDebugData->signalCallback = [this, &oldActions = mDebugData->oldActions, myAction = std::move(sa)](int32_t signal, siginfo_t* info, void* ucontext) { + static std::mutex callbackMutex; + std::lock_guard guard(callbackMutex); + if (mDebugData->debugCallback) { + GPUInfo("Running debug callback for signal %d", signal); + mDebugData->debugCallback(); + mDebugData->debugCount++; + } + mDebugData->debugCallback = nullptr; + if (!GetProcessingSettings().debugOnFailureNoForwardSignal) { + sigaction(signal, &oldActions[signal], nullptr); + raise(signal); + mDebugData->reinstallCallback = [signal, myAction]() { sigaction(signal, &myAction, nullptr); }; + } + }; + } + } +} + +void GPUReconstruction::debugExit() +{ + if (!mDebugEnabled) { + return; + } + if (mDebugData) { + for (auto& it : mDebugData->oldActions) { + if (sigaction(it.first, &it.second, nullptr)) { + GPUFatal("Error restoring signal handler for signal %d", it.first); + } + } + } + mDebugEnabled = false; +} + +void GPUReconstruction::setDebugDumpCallback(std::function&& callback) +{ + if (mMaster) { + if (mDebugData->reinstallCallback) { + mDebugData->reinstallCallback(); + mDebugData->reinstallCallback = nullptr; + } + mMaster->setDebugDumpCallback(std::move(callback)); + } else if (mDebugEnabled && mDebugData) { + mDebugData->debugCallback = callback; + } +} + +std::string GPUReconstruction::getDebugFolder(const std::string& prefix) +{ + const std::filesystem::path target_dir = GetProcessingSettings().debugOnFailureDirectory; + + std::size_t total_size = 0; + std::size_t subfolder_count = 0; + + if (!std::filesystem::exists(target_dir) || !std::filesystem::is_directory(target_dir)) { + GPUError("Invalid debugOnFailureDirectory %s", GetProcessingSettings().debugOnFailureDirectory.c_str()); + return ""; + } + + for (const auto& entry : std::filesystem::directory_iterator(target_dir)) { + if (entry.is_directory()) { + subfolder_count++; + + for (const auto& subentry : std::filesystem::directory_iterator(entry.path())) { + if (subentry.is_regular_file()) { + std::error_code ec; + auto size = std::filesystem::file_size(subentry.path(), ec); + if (!ec) { + total_size += size; + } + } + } + } + } + + if ((GetProcessingSettings().debugOnFailureMaxFiles && subfolder_count >= GetProcessingSettings().debugOnFailureMaxFiles) || (GetProcessingSettings().debugOnFailureMaxSize && (total_size >> 30) >= GetProcessingSettings().debugOnFailureMaxSize)) { + GPUError("Cannot store debug dump files, target storage exceeded: %zu dumps, %zu bytes", subfolder_count, total_size); + return ""; + } + + auto currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + std::ostringstream dateTime; + dateTime << std::put_time(std::localtime(¤tTime), "%Y-%m-%d_%H-%M-%S"); + + int32_t attempt = 0; + std::string outname; + while (true) { + if (attempt++ >= 512) { + GPUError("Error creating debug dump folder"); + return ""; + } + + outname = GetProcessingSettings().debugOnFailureDirectory + "/debug_" + prefix + (prefix == "" ? "" : "_") + dateTime.str() + "_" + std::to_string(attempt); + std::error_code ec; + bool created = std::filesystem::create_directory(outname, ec); + if (!ec && created) { + break; + } + } + + GPUInfo("Debug dump to %s", outname.c_str()); + return outname; +} + +bool GPUReconstruction::triggerDebugDump() +{ + if (mMaster) { + return mMaster->triggerDebugDump(); + } else if (mDebugEnabled && mDebugData && mDebugData->debugCallback) { + GPUInfo("Running triggered debug callback"); + mDebugData->debugCallback(); + mDebugData->debugCount++; + mDebugData->debugCallback = nullptr; + return true; + } + return false; +} diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index 89517c612403b..64184dd724acd 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUReconstruction.cxx +/// \file GPUReconstructionLibrary.cxx /// \author David Rohr #ifdef _WIN32 diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 52848692e7516..1b108bc74190d 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -95,6 +95,7 @@ set(SRCS_NO_CINT set(SRCS_NO_H SectorTracker/GPUTPCTrackerDump.cxx Merger/GPUTPCGMMergerDump.cxx Base/GPUReconstructionLibrary.cxx + Base/GPUReconstructionDebug.cxx Global/GPUChainTrackingClusterizer.cxx Global/GPUChainTrackingTransformation.cxx Global/GPUChainTrackingTRD.cxx diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b9be1db881816..12f40cda4c398 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -360,6 +360,14 @@ AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") +AddOption(debugOnFailure, int32_t, 0, "", 0, "Dump raw data in case an error occured, bit 1 enables all dumps, otherwise bitmask for: 2 = signal, 3 = GPUErrorCode", def(1)) +AddOption(debugOnFailureSignalMask, uint32_t, (uint32_t)-1, "", 0, "Mask of signals that trigger debug / dump") +AddOption(debugOnFailureErrorMask, uint64_t, (uint64_t)-1, "", 0, "Mask of GPUCA_ERRORS that trigger debug / dump") +AddOption(debugOnFailureNoForwardSignal, bool, false, "", 0, "Do not forward signal to original signal handler") +AddOption(debugOnFailureMaxN, uint32_t, 1, "", 0, "Max number of times to run the debug / dump") +AddOption(debugOnFailureMaxFiles, uint32_t, 0, "", 0, "Max number of files to have in the target folder") +AddOption(debugOnFailureMaxSize, uint32_t, 0, "", 0, "Max size of existing dumps in the target folder in GB") +AddOption(debugOnFailureDirectory, std::string, ".", "", 0, "Target folder for debug / dump") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index c1c3e368ce90c..db84050772312 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -705,10 +705,14 @@ int32_t GPUChainTracking::RunChain() } mRec->getGeneralStepTimer(GeneralStep::Prepare).Stop(); - PrepareDebugOutput(); + PrepareKernelDebugOutput(); SynchronizeStream(0); // Synchronize all init copies that might be ongoing + if (GetProcessingSettings().debugOnFailure) { + mRec->setDebugDumpCallback([this]() { DoDebugRawDump(); }); + } + if (mIOPtrs.tpcCompressedClusters) { if (runRecoStep(RecoStep::TPCDecompression, &GPUChainTracking::RunTPCDecompression)) { return 1; @@ -775,7 +779,7 @@ int32_t GPUChainTracking::RunChain() } int32_t retVal = 0; - if (CheckErrorCodes(false, false, mRec->getErrorCodeOutput())) { + if (CheckErrorCodes(false, false, mRec->getErrorCodeOutput())) { // TODO: Eventually, we should use GPUReconstruction::CheckErrorCodes retVal = 3; if (!GetProcessingSettings().ignoreNonFatalGPUErrors) { return retVal; @@ -815,7 +819,7 @@ int32_t GPUChainTracking::RunChainFinalize() PrintOutputStat(); } - PrintDebugOutput(); + PrintKernelDebugOutput(); // PrintMemoryRelations(); @@ -884,6 +888,7 @@ int32_t GPUChainTracking::FinalizePipelinedProcessing() int32_t GPUChainTracking::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, std::vector>* fillErrors) { int32_t retVal = 0; + bool hasDebugError = false; for (int32_t i = 0; i < 1 + (!cpuOnly && mRec->IsGPU()); i++) { if (i) { const auto& threadContext = GetThreadContext(); @@ -925,9 +930,26 @@ int32_t GPUChainTracking::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, st fillErrors->emplace_back(std::array{pErrors[4 * j], pErrors[4 * j + 1], pErrors[4 * j + 2], pErrors[4 * j + 3]}); } } + if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 4)) { + if (GetProcessingSettings().debugOnFailureErrorMask == (uint64_t)-1) { + hasDebugError = true; + } else { + uint32_t nErrors = processors()->errorCodes.getNErrors(); + const uint32_t* pErrors = processors()->errorCodes.getErrorPtr(); + for (uint32_t j = 0; j < nErrors; j++) { + if (GetProcessingSettings().debugOnFailureErrorMask & (1 << pErrors[4 * j])) { + hasDebugError = true; + break; + } + } + } + } } } ClearErrorCodes(cpuOnly); + if (hasDebugError) { + mRec->triggerDebugDump(); + } return retVal; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 2a2996895dbcf..7d4adcd70af7f 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -134,7 +134,7 @@ class GPUChainTracking : public GPUChain void ClearIOPointers(); void AllocateIOMemory(); using GPUChain::DumpData; - void DumpData(const char* filename); + void DumpData(const char* filename, const GPUTrackingInOutPointers* ioPtrs = nullptr); using GPUChain::ReadData; int32_t ReadData(const char* filename); void DumpSettings(const char* dir = "") override; @@ -231,11 +231,12 @@ class GPUChainTracking : public GPUChain int32_t DoProfile(); void PrintMemoryRelations(); void PrintMemoryStatistics() override; - void PrepareDebugOutput(); - void PrintDebugOutput(); + void PrepareKernelDebugOutput(); + void PrintKernelDebugOutput(); void PrintOutputStat(); static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls); + void DoDebugRawDump(); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index f72943e6bcd5a..e9721ec9d12bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -185,7 +185,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } -void GPUChainTracking::PrepareDebugOutput() +void GPUChainTracking::PrepareKernelDebugOutput() { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT const auto& threadContext = GetThreadContext(); @@ -198,7 +198,7 @@ void GPUChainTracking::PrepareDebugOutput() #endif } -void GPUChainTracking::PrintDebugOutput() +void GPUChainTracking::PrintKernelDebugOutput() { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT const auto& threadContext = GetThreadContext(); @@ -390,3 +390,23 @@ void GPUChainTracking::DebugSortCompressedClusters(o2::tpc::CompressedClustersFl sortMultiple(c.nAttachedClustersReduced, getReducedOffset, getN1, c.rowDiffA, c.sliceLegDiffA, c.padResA, c.timeResA); sortMultiple(c.nTracks, getIndex, get1, c.qPtA, c.rowA, c.sliceA, c.timeA, c.padA, c.nTrackClusters); // NOTE: This must be last, since nTrackClusters is used for handling the arrays above! } + +void GPUChainTracking::DoDebugRawDump() +{ + std::string dirName = mRec->getDebugFolder("tpc_raw"); + if (dirName == "") { + return; + } + GPUTrackingInOutPointers ioPtrs; + if (mIOPtrs.tpcZS) { + ioPtrs.tpcZS = mIOPtrs.tpcZS; + } else if (mIOPtrs.tpcPackedDigits) { + ioPtrs.tpcPackedDigits = mIOPtrs.tpcPackedDigits; + } else if (mIOPtrs.clustersNative) { + ioPtrs.clustersNative = mIOPtrs.clustersNative; + } + + GPUInfo("Doing debug raw dump"); + mRec->DumpSettings((dirName + "/").c_str()); + DumpData((dirName + "/event.0.dump").c_str(), &ioPtrs); +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 035e257ca7952..5a141cd08eb65 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -63,33 +63,36 @@ GPUChainTracking::InOutMemory::~InOutMemory() = default; GPUChainTracking::InOutMemory::InOutMemory(GPUChainTracking::InOutMemory&&) = default; GPUChainTracking::InOutMemory& GPUChainTracking::InOutMemory::operator=(GPUChainTracking::InOutMemory&&) = default; // NOLINT: False positive in clang-tidy -void GPUChainTracking::DumpData(const char* filename) +void GPUChainTracking::DumpData(const char* filename, const GPUTrackingInOutPointers* ioPtrs) { FILE* fp = fopen(filename, "w+b"); if (fp == nullptr) { return; } + if (ioPtrs == nullptr) { + ioPtrs = &mIOPtrs; + } fwrite(DUMP_HEADER, 1, DUMP_HEADER_SIZE, fp); fwrite(&GPUReconstruction::geometryType, sizeof(GPUReconstruction::geometryType), 1, fp); - DumpData(fp, mIOPtrs.clusterData, mIOPtrs.nClusterData, InOutPointerType::CLUSTER_DATA); - DumpData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, InOutPointerType::RAW_CLUSTERS); - if (mIOPtrs.clustersNative) { - if (DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { - fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); - if (mIOPtrs.clustersNative->clustersMCTruth) { - const auto& buffer = mIOPtrs.clustersNative->clustersMCTruth->getBuffer(); + DumpData(fp, ioPtrs->clusterData, ioPtrs->nClusterData, InOutPointerType::CLUSTER_DATA); + DumpData(fp, ioPtrs->rawClusters, ioPtrs->nRawClusters, InOutPointerType::RAW_CLUSTERS); + if (ioPtrs->clustersNative) { + if (DumpData(fp, &ioPtrs->clustersNative->clustersLinear, &ioPtrs->clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { + fwrite(&ioPtrs->clustersNative->nClusters[0][0], sizeof(ioPtrs->clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); + if (ioPtrs->clustersNative->clustersMCTruth) { + const auto& buffer = ioPtrs->clustersNative->clustersMCTruth->getBuffer(); std::pair tmp = {buffer.data(), buffer.size()}; DumpData(fp, &tmp.first, &tmp.second, InOutPointerType::CLUSTER_NATIVE_MC); } } } - if (mIOPtrs.tpcPackedDigits) { - if (DumpData(fp, mIOPtrs.tpcPackedDigits->tpcDigits, mIOPtrs.tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && mIOPtrs.tpcPackedDigits->tpcDigitsMC) { + if (ioPtrs->tpcPackedDigits) { + if (DumpData(fp, ioPtrs->tpcPackedDigits->tpcDigits, ioPtrs->tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && ioPtrs->tpcPackedDigits->tpcDigitsMC) { const char* ptrs[NSECTORS]; size_t sizes[NSECTORS]; for (uint32_t i = 0; i < NSECTORS; i++) { - if (mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]) { - const auto& buffer = mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); + if (ioPtrs->tpcPackedDigits->tpcDigitsMC->v[i]) { + const auto& buffer = ioPtrs->tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); ptrs[i] = buffer.data(); sizes[i] = buffer.size(); } else { @@ -100,12 +103,12 @@ void GPUChainTracking::DumpData(const char* filename) DumpData(fp, ptrs, sizes, InOutPointerType::TPC_DIGIT_MC); } } - if (mIOPtrs.tpcZS) { + if (ioPtrs->tpcZS) { size_t total = 0; for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { - total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < ioPtrs->tpcZS->sector[i].count[j]; k++) { + total += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -115,10 +118,10 @@ void GPUChainTracking::DumpData(const char* filename) total = 0; for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { - memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); - counts.count[i][j] += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; - total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < ioPtrs->tpcZS->sector[i].count[j]; k++) { + memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], ioPtrs->tpcZS->sector[i].zsPtr[j][k], ioPtrs->tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); + counts.count[i][j] += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; + total += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -127,33 +130,33 @@ void GPUChainTracking::DumpData(const char* filename) fwrite(&counts, sizeof(counts), 1, fp); } } - if (mIOPtrs.tpcCompressedClusters) { - if (mIOPtrs.tpcCompressedClusters->ptrForward) { + if (ioPtrs->tpcCompressedClusters) { + if (ioPtrs->tpcCompressedClusters->ptrForward) { throw std::runtime_error("Cannot dump non-flat compressed clusters"); } - char* ptr = (char*)mIOPtrs.tpcCompressedClusters; - size_t size = mIOPtrs.tpcCompressedClusters->totalDataSize; + char* ptr = (char*)ioPtrs->tpcCompressedClusters; + size_t size = ioPtrs->tpcCompressedClusters->totalDataSize; DumpData(fp, &ptr, &size, InOutPointerType::TPC_COMPRESSED_CL); } - if (mIOPtrs.settingsTF) { + if (ioPtrs->settingsTF) { uint32_t n = 1; - DumpData(fp, &mIOPtrs.settingsTF, &n, InOutPointerType::TF_SETTINGS); + DumpData(fp, &ioPtrs->settingsTF, &n, InOutPointerType::TF_SETTINGS); } - DumpData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); - DumpData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); - DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); - DumpData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, InOutPointerType::MC_INFO_TPC); - DumpData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); - DumpData(fp, &mIOPtrs.mergedTracks, &mIOPtrs.nMergedTracks, InOutPointerType::MERGED_TRACK); - DumpData(fp, &mIOPtrs.mergedTrackHits, &mIOPtrs.nMergedTrackHits, InOutPointerType::MERGED_TRACK_HIT); - DumpData(fp, &mIOPtrs.trdTracks, &mIOPtrs.nTRDTracks, InOutPointerType::TRD_TRACK); - DumpData(fp, &mIOPtrs.trdTracklets, &mIOPtrs.nTRDTracklets, InOutPointerType::TRD_TRACKLET); - if (mIOPtrs.trdSpacePoints) { - DumpData(fp, &mIOPtrs.trdSpacePoints, &mIOPtrs.nTRDTracklets, InOutPointerType::TRD_SPACEPOINT); + DumpData(fp, ioPtrs->sectorTracks, ioPtrs->nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + DumpData(fp, ioPtrs->sectorClusters, ioPtrs->nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); + DumpData(fp, &ioPtrs->mcLabelsTPC, &ioPtrs->nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); + DumpData(fp, &ioPtrs->mcInfosTPC, &ioPtrs->nMCInfosTPC, InOutPointerType::MC_INFO_TPC); + DumpData(fp, &ioPtrs->mcInfosTPCCol, &ioPtrs->nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); + DumpData(fp, &ioPtrs->mergedTracks, &ioPtrs->nMergedTracks, InOutPointerType::MERGED_TRACK); + DumpData(fp, &ioPtrs->mergedTrackHits, &ioPtrs->nMergedTrackHits, InOutPointerType::MERGED_TRACK_HIT); + DumpData(fp, &ioPtrs->trdTracks, &ioPtrs->nTRDTracks, InOutPointerType::TRD_TRACK); + DumpData(fp, &ioPtrs->trdTracklets, &ioPtrs->nTRDTracklets, InOutPointerType::TRD_TRACKLET); + if (ioPtrs->trdSpacePoints) { + DumpData(fp, &ioPtrs->trdSpacePoints, &ioPtrs->nTRDTracklets, InOutPointerType::TRD_SPACEPOINT); } - DumpData(fp, &mIOPtrs.trdTriggerTimes, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); - DumpData(fp, &mIOPtrs.trdTrackletIdxFirst, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); - DumpData(fp, &mIOPtrs.trdTrigRecMask, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTriggerTimes, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTrackletIdxFirst, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTrigRecMask, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); fclose(fp); } diff --git a/GPU/GPUTracking/Global/GPUErrorCodes.h b/GPU/GPUTracking/Global/GPUErrorCodes.h index f35f5fc81a382..8fec23be00a09 100644 --- a/GPU/GPUTracking/Global/GPUErrorCodes.h +++ b/GPU/GPUTracking/Global/GPUErrorCodes.h @@ -47,5 +47,6 @@ GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_ROW, SectorRow) GPUCA_ERROR_CODE(27, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded GPUCA_ERROR_CODE(28, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete GPUCA_ERROR_CODE(29, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset +GPUCA_ERROR_CODE(29, MAX_GPUCA_ERROR_NUMBER) // #define GPUCA_CHECK_TPCZS_CORRUPTION diff --git a/GPU/GPUTracking/Global/GPUErrors.cxx b/GPU/GPUTracking/Global/GPUErrors.cxx index 7f3ed1d8206d9..e9d5a74c6567a 100644 --- a/GPU/GPUTracking/Global/GPUErrors.cxx +++ b/GPU/GPUTracking/Global/GPUErrors.cxx @@ -54,12 +54,17 @@ static std::unordered_map errorNames = { #undef GPUCA_ERROR_CODE }; -void GPUErrors::printErrors(bool silent) +bool GPUErrors::printErrors(bool silent, uint64_t mask) { + bool retVal = 0; for (uint32_t i = 0; i < std::min(*mErrors, GPUCA_MAX_ERRORS); i++) { uint32_t errorCode = mErrors[4 * i + 1]; const auto& it = errorNames.find(errorCode); const char* errorName = it == errorNames.end() ? "INVALID ERROR CODE" : it->second; + static_assert(MAX_GPUCA_ERROR_NUMBER <= sizeof(mask) * 8); + if (mask & (1 << errorCode)) { + retVal = 1; + } if (silent && i) { GPUWarning("GPU Error Code (%u:%u) %s : %u / %u / %u", i, errorCode, errorName, mErrors[4 * i + 2], mErrors[4 * i + 3], mErrors[4 * i + 4]); } else if (silent) { @@ -75,6 +80,7 @@ void GPUErrors::printErrors(bool silent) GPUError("Additional errors occured (codes not stored)"); } } + return retVal; } uint32_t GPUErrors::getNErrors() const diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index cd86390bc1b01..1cbc4a019601d 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -33,7 +33,7 @@ class GPUErrors GPUd() bool hasError() { return *mErrors > 0; } void setMemory(GPUglobalref() uint32_t* m) { mErrors = m; } void clear(); - void printErrors(bool silent = false); + bool printErrors(bool silent = false, uint64_t mask = 0); uint32_t getNErrors() const; const uint32_t* getErrorPtr() const; static uint32_t getMaxErrors(); From bb048efab7be5df04ad93a974abef167f4c6e88a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 21 May 2025 19:41:52 +0200 Subject: [PATCH 0390/1764] DPL Analysis: add `clamp` expression node to constrain a result of an expresison between two values (#14305) --- Framework/Core/include/Framework/Expressions.h | 8 ++++++++ Framework/Core/test/test_Expressions.cxx | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 18c930700a91d..9d6c3cfb7c66e 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -546,6 +546,14 @@ inline Node updateParameters(Node const& pexp, int bins, std::vector const& p return result; } +/// clamping functional +template +inline Node clamp(Node&& expr, T low, T hi) +{ + auto copy = expr; + return ifnode(Node{copy} < LiteralNode{low}, LiteralNode{low}, ifnode(Node{copy} > LiteralNode{hi}, LiteralNode{hi}, Node{copy})); +} + /// A struct, containing the root of the expression tree struct Filter { Filter() = default; diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 6faa2fc352232..e8cf43e03e11d 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -290,6 +290,13 @@ TEST_CASE("TestConditionalExpressions") auto gandiva_condition2 = makeCondition(gandiva_tree2); auto gandiva_filter2 = createFilter(schema2, gandiva_condition2); REQUIRE(gandiva_tree2->ToString() == "bool greater_than((float) fSigned1Pt, (const float) 0 raw(0)) && if (bool less_than(float absf((float) fEta), (const float) 1 raw(3f800000)) && if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { bool greater_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) } else { bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) }) { bool greater_than(float absf((float) fX), (const float) 1 raw(3f800000)) } else { bool greater_than(float absf((float) fY), (const float) 1 raw(3f800000)) }"); + + // clamp + Projector clp = clamp(o2::aod::track::pt, 1.0f, 10.f); + auto clpspecs = createOperations(clp); + auto schemaclp = std::make_shared(std::vector{o2::aod::track::Pt::asArrowField()}); + auto gandiva_tree_clp = createExpressionTree(clpspecs, schemaclp); + REQUIRE(gandiva_tree_clp->ToString() == "if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { (const float) 1 raw(3f800000) } else { if (bool greater_than((float) fPt, (const float) 10 raw(41200000))) { (const float) 10 raw(41200000) } else { (float) fPt } }"); } TEST_CASE("TestBinnedExpressions") From 35ca22b3bde5014b40ebe1d823c6dc88a313ddbf Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 21 May 2025 19:54:16 +0200 Subject: [PATCH 0391/1764] DPL: fix merging of pipelined devices (#14307) Sometimes we are just too smart. Multiple messages with the same signature are coalesced in the same input if they are processed at the same time. This explains why the sleep was improving behavior: it merely staggers arrival, so that the optimisation cannot happen anymore. --- .../AnalysisSupport/src/AODWriterHelpers.cxx | 254 +++++++++--------- 1 file changed, 129 insertions(+), 125 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx index 2b1b4f880d1ee..40d2189ea96d0 100644 --- a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx @@ -269,145 +269,149 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) callbacks.set(endofdatacb); return [inputObjects, objmap, tskmap](ProcessingContext& pc) mutable -> void { - auto const& ref = pc.inputs().get("x"); - if (!ref.header) { - LOG(error) << "Header not found"; - return; - } - auto datah = o2::header::get(ref.header); - if (!datah) { - LOG(error) << "No data header in stack"; - return; - } + auto mergePart = [&inputObjects, &objmap, &tskmap](DataRef const& ref) { + if (!ref.header) { + LOG(error) << "Header not found"; + return; + } + auto datah = o2::header::get(ref.header); + if (!datah) { + LOG(error) << "No data header in stack"; + return; + } - if (!ref.payload) { - LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + if (!ref.payload) { + LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - auto objh = o2::header::get(ref.header); - if (!objh) { - LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + auto objh = o2::header::get(ref.header); + if (!objh) { + LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - InputObject obj; - FairInputTBuffer tm(const_cast(ref.payload), static_cast(datah->payloadSize)); - tm.InitMap(); - obj.kind = tm.ReadClass(); - tm.SetBufferOffset(0); - tm.ResetMap(); - if (obj.kind == nullptr) { - LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + InputObject obj; + FairInputTBuffer tm(const_cast(ref.payload), static_cast(datah->payloadSize)); + tm.InitMap(); + obj.kind = tm.ReadClass(); + tm.SetBufferOffset(0); + tm.ResetMap(); + if (obj.kind == nullptr) { + LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - auto policy = objh->mPolicy; - auto sourceType = objh->mSourceType; - auto hash = objh->mTaskHash; + auto policy = objh->mPolicy; + auto sourceType = objh->mSourceType; + auto hash = objh->mTaskHash; - obj.obj = tm.ReadObjectAny(obj.kind); - auto* named = static_cast(obj.obj); - obj.name = named->GetName(); - auto hpos = std::find_if(tskmap.begin(), tskmap.end(), [&](auto&& x) { return x.id == hash; }); - if (hpos == tskmap.end()) { - LOG(error) << "No task found for hash " << hash; - return; - } - auto taskname = hpos->name; - auto opos = std::find_if(objmap.begin(), objmap.end(), [&](auto&& x) { return x.id == hash; }); - if (opos == objmap.end()) { - LOG(error) << "No object list found for task " << taskname << " (hash=" << hash << ")"; - return; - } - auto objects = opos->bindings; - if (std::find(objects.begin(), objects.end(), obj.name) == objects.end()) { - LOG(error) << "No object " << obj.name << " in map for task " << taskname; - return; - } - auto nameHash = runtime_hash(obj.name.c_str()); - InputObjectRoute key{obj.name, nameHash, taskname, hash, policy, sourceType}; - auto existing = std::find_if(inputObjects->begin(), inputObjects->end(), [&](auto&& x) { return (x.first.uniqueId == nameHash) && (x.first.taskHash == hash); }); - // If it's the first one, we just add it to the list. - if (existing == inputObjects->end()) { - obj.count = objh->mPipelineSize; - inputObjects->push_back(std::make_pair(key, obj)); - existing = inputObjects->end() - 1; - } else { - obj.count = existing->second.count; - // Otherwise, we merge it with the existing one. - auto merger = existing->second.kind->GetMerge(); - if (!merger) { - LOG(error) << "Already one unmergeable object found for " << obj.name; + obj.obj = tm.ReadObjectAny(obj.kind); + auto* named = static_cast(obj.obj); + obj.name = named->GetName(); + auto hpos = std::find_if(tskmap.begin(), tskmap.end(), [&](auto&& x) { return x.id == hash; }); + if (hpos == tskmap.end()) { + LOG(error) << "No task found for hash " << hash; return; } - TList coll; - coll.Add(static_cast(obj.obj)); - merger(existing->second.obj, &coll, nullptr); - } - // We expect as many objects as the pipeline size, for - // a given object name and task hash. - existing->second.count -= 1; - - if (existing->second.count != 0) { - return; - } - // Write the object here. - auto route = existing->first; - auto entry = existing->second; - auto file = ROOTfileNames.find(route.policy); - if (file == ROOTfileNames.end()) { - return; - } - auto filename = file->second; - if (f[route.policy] == nullptr) { - f[route.policy] = TFile::Open(filename.c_str(), "RECREATE"); - } - auto nextDirectory = route.directory; - if ((nextDirectory != currentDirectory) || (filename != currentFile)) { - if (!f[route.policy]->FindKey(nextDirectory.c_str())) { - f[route.policy]->mkdir(nextDirectory.c_str()); + auto taskname = hpos->name; + auto opos = std::find_if(objmap.begin(), objmap.end(), [&](auto&& x) { return x.id == hash; }); + if (opos == objmap.end()) { + LOG(error) << "No object list found for task " << taskname << " (hash=" << hash << ")"; + return; } - currentDirectory = nextDirectory; - currentFile = filename; - } + auto objects = opos->bindings; + if (std::find(objects.begin(), objects.end(), obj.name) == objects.end()) { + LOG(error) << "No object " << obj.name << " in map for task " << taskname; + return; + } + auto nameHash = runtime_hash(obj.name.c_str()); + InputObjectRoute key{obj.name, nameHash, taskname, hash, policy, sourceType}; + auto existing = std::find_if(inputObjects->begin(), inputObjects->end(), [&](auto&& x) { return (x.first.uniqueId == nameHash) && (x.first.taskHash == hash); }); + // If it's the first one, we just add it to the list. + if (existing == inputObjects->end()) { + obj.count = objh->mPipelineSize; + inputObjects->push_back(std::make_pair(key, obj)); + existing = inputObjects->end() - 1; + } else { + obj.count = existing->second.count; + // Otherwise, we merge it with the existing one. + auto merger = existing->second.kind->GetMerge(); + if (!merger) { + LOG(error) << "Already one unmergeable object found for " << obj.name; + return; + } + TList coll; + coll.Add(static_cast(obj.obj)); + merger(existing->second.obj, &coll, nullptr); + } + // We expect as many objects as the pipeline size, for + // a given object name and task hash. + existing->second.count -= 1; - // translate the list-structure created by the registry into a directory structure within the file - std::function writeListToFile; - writeListToFile = [&](TList* list, TDirectory* parentDir) { - TIter next(list); - TObject* object = nullptr; - while ((object = next())) { - if (object->InheritsFrom(TList::Class())) { - writeListToFile(static_cast(object), parentDir->mkdir(object->GetName(), object->GetName(), true)); - } else { - parentDir->WriteObjectAny(object, object->Class(), object->GetName()); - auto* written = list->Remove(object); - delete written; + if (existing->second.count != 0) { + return; + } + // Write the object here. + auto route = existing->first; + auto entry = existing->second; + auto file = ROOTfileNames.find(route.policy); + if (file == ROOTfileNames.end()) { + return; + } + auto filename = file->second; + if (f[route.policy] == nullptr) { + f[route.policy] = TFile::Open(filename.c_str(), "RECREATE"); + } + auto nextDirectory = route.directory; + if ((nextDirectory != currentDirectory) || (filename != currentFile)) { + if (!f[route.policy]->FindKey(nextDirectory.c_str())) { + f[route.policy]->mkdir(nextDirectory.c_str()); } + currentDirectory = nextDirectory; + currentFile = filename; } - }; - TDirectory* currentDir = f[route.policy]->GetDirectory(currentDirectory.c_str()); - if (route.sourceType == OutputObjSourceType::HistogramRegistrySource) { - auto* outputList = static_cast(entry.obj); - outputList->SetOwner(false); + // translate the list-structure created by the registry into a directory structure within the file + std::function writeListToFile; + writeListToFile = [&](TList* list, TDirectory* parentDir) { + TIter next(list); + TObject* object = nullptr; + while ((object = next())) { + if (object->InheritsFrom(TList::Class())) { + writeListToFile(static_cast(object), parentDir->mkdir(object->GetName(), object->GetName(), true)); + } else { + parentDir->WriteObjectAny(object, object->Class(), object->GetName()); + auto* written = list->Remove(object); + delete written; + } + } + }; + + TDirectory* currentDir = f[route.policy]->GetDirectory(currentDirectory.c_str()); + if (route.sourceType == OutputObjSourceType::HistogramRegistrySource) { + auto* outputList = static_cast(entry.obj); + outputList->SetOwner(false); + + // if registry should live in dedicated folder a TNamed object is appended to the list + if (outputList->Last() && outputList->Last()->IsA() == TNamed::Class()) { + delete outputList->Last(); + outputList->RemoveLast(); + currentDir = currentDir->mkdir(outputList->GetName(), outputList->GetName(), true); + } - // if registry should live in dedicated folder a TNamed object is appended to the list - if (outputList->Last() && outputList->Last()->IsA() == TNamed::Class()) { - delete outputList->Last(); - outputList->RemoveLast(); - currentDir = currentDir->mkdir(outputList->GetName(), outputList->GetName(), true); + writeListToFile(outputList, currentDir); + outputList->SetOwner(); + delete outputList; + entry.obj = nullptr; + } else { + currentDir->WriteObjectAny(entry.obj, entry.kind, entry.name.c_str()); + delete (TObject*)entry.obj; + entry.obj = nullptr; } - - writeListToFile(outputList, currentDir); - outputList->SetOwner(); - delete outputList; - entry.obj = nullptr; - } else { - currentDir->WriteObjectAny(entry.obj, entry.kind, entry.name.c_str()); - delete (TObject*)entry.obj; - entry.obj = nullptr; + }; + for (int pi = 0; pi < pc.inputs().getNofParts(0); ++pi) { + mergePart(pc.inputs().get("x", pi)); } }; }}; From 0386f65567a1fcda173c9ae39304bbd284677774 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 19 May 2025 10:49:44 +0200 Subject: [PATCH 0392/1764] Revert "GPU Common: Workaround for removing gpustd::array, temporary alias for O2Physics" This reverts commit a850e9eb3e6a634a1e87a70170c05ad6d8bce3af. --- .../ReconstructionDataFormats/TrackParametrization.h | 1 - GPU/Common/GPUCommonArray.h | 6 ------ 2 files changed, 7 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index 1d0a5f1a9f1fd..f240e34861eeb 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,7 +29,6 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index fa86d7bb4a021..e83ca8c4a69fc 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -48,10 +48,4 @@ using array = std::array; } // namespace std #endif -namespace o2::gpu::gpustd -{ -template -using array = ::std::array; // temporary alias, to remove dependent types -} // o2::gpu::gpustd - #endif // GPUCOMMONARRAY_H From efff2f780c2b35603cdd6614f868100e94ba41e7 Mon Sep 17 00:00:00 2001 From: shahor02 Date: Wed, 21 May 2025 23:04:18 +0200 Subject: [PATCH 0393/1764] Add TPC cluster selector helper for tracking studies (#14308) --- .../study/CMakeLists.txt | 7 ++ .../GlobalTrackingStudy/TPCClusSelector.h | 92 ++++++++++++++ .../study/src/GlobalTrackingStudyLinkDef.h | 1 + .../study/src/TPCClusSelector.cxx | 117 ++++++++++++++++++ 4 files changed, 217 insertions(+) create mode 100644 Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h create mode 100644 Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx diff --git a/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt b/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt index 398e7eb215f2e..776d3946283c3 100644 --- a/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt +++ b/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt @@ -12,6 +12,7 @@ #add_compile_options(-O0 -g -fPIC) o2_add_library(GlobalTrackingStudy + TARGETVARNAME targetName SOURCES src/TPCTrackStudy.cxx src/TrackingStudy.cxx src/SVStudy.cxx @@ -23,6 +24,7 @@ o2_add_library(GlobalTrackingStudy src/TrackInfoExt.cxx src/TrackMCStudyConfig.cxx src/TrackMCStudyTypes.cxx + src/TPCClusSelector.cxx PUBLIC_LINK_LIBRARIES O2::GlobalTracking O2::GlobalTrackingWorkflowReaders O2::GlobalTrackingWorkflowHelpers @@ -73,3 +75,8 @@ o2_add_executable(dump-workfow COMPONENT_NAME bc-tracks SOURCES src/track-dump-workflow.cxx PUBLIC_LINK_LIBRARIES O2::GlobalTrackingStudy) + +if (OpenMP_CXX_FOUND) + target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) + target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) +endif() diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h new file mode 100644 index 0000000000000..c1765558458c2 --- /dev/null +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h @@ -0,0 +1,92 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// helper class for TPC clusters selection + +#ifndef ALICEO2_TPCCLUSSELECTOR_H +#define ALICEO2_TPCCLUSSELECTOR_H + +#include +#include +#include + +namespace o2::tpc +{ +class ClusterNativeAccess; + +class TPCClusSelector +{ + // helper to select TPC cluster matching to certain timebin and optionally pads range + // example of usage: + /* + TPCClusSelector clSel; + o2::tpc::ClusterNativeHelper::Reader tcpClusterReader; + tcpClusterReader.init(native_clusters_file.c_str()); + o2::tpc::ClusterNativeAccess tpcClusterIdxStruct; + std::unique_ptr tpcClusterBuffer; ///< buffer for clusters in tpcClusterIdxStruct + o2::tpc::ClusterNativeHelper::ConstMCLabelContainerViewWithBuffer tpcClusterMCBuffer; ///< buffer for mc labels + + tcpClusterReader.read(iTF); + tcpClusterReader.fillIndex(tpcClusterIdxStruct, tpcClusterBuffer, tpcClusterMCBuffer); + + clSel.fill(tpcClusterIdxStruct); // Create sorted index + // to get i-th cluster in orderer timebins: + const auto& clus = tpcClusterIdxStruct.clusters[sector][row][ clSel.getIndex(sector, row, i)]; + + // to get sorted indices range of clusters in the tbmin:tbmax range + auto rng = clSel.findClustersRange(sector, row, tbmin, tbmax, tpcClusterIdxStruct); + if (rng.first>rng.second) { // nothing is found } + const auto& cln = tpcClusterIdxStruct.clusters[sector][row][clSel.getIndex(sector, row, rng.first )]; /... + + // to get number of clusters in tbmin:tbmax, padmin:padmax range (and optionally get the list) + std::vector cllist; // optional list + int nfnd = clSel.findClustersEntries(sector, row, tbmin, tbmax, padmin, padmax, tpcClusterIdxStruct, &cllist); + for (int i=0;i findClustersRange(int sec, int row, float tbmin, float tbmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct); + int findClustersEntries(int sec, int row, float tbmin, float tbmax, float padmin, float padmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct, std::vector* clIDDirect = nullptr); + void fill(const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct); + + int getNThreads() const { return mNThreads; } + void setNThreads(int n); + + private: + struct Sector { + static constexpr int NRows = 152; + std::array, NRows> rows; + void clear() + { + for (auto& r : rows) + r.clear(); + } + }; + + static constexpr int NSectors = 36; + std::array mSectors{}; + int mNThreads = 1; + + ClassDefNV(TPCClusSelector, 1); +}; + +} // namespace o2::tpc + +#endif diff --git a/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h b/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h index f666132c9c1cf..f0d3e7d4d0b4e 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h +++ b/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h @@ -38,5 +38,6 @@ #pragma link C++ class std::vector < o2::trackstudy::ClResTPCCont> + ; #pragma link C++ class o2::trackstudy::TrackPairInfo + ; #pragma link C++ class std::vector < o2::trackstudy::TrackPairInfo> + ; +#pragma ling C++ class o2::tpc::TPCClusSelector + ; #endif diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx new file mode 100644 index 0000000000000..e5b28fb0fd62b --- /dev/null +++ b/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx @@ -0,0 +1,117 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// helper class for TPC clusters selection +#include "GlobalTrackingStudy/TPCClusSelector.h" +#include "DataFormatsTPC/ClusterNativeHelper.h" +#include "Framework/Logger.h" +#include +#ifdef WITH_OPENMP +#include +#endif + +using namespace o2::tpc; + +void TPCClusSelector::setNThreads(int n) +{ +#ifndef WITH_OPENMP + if (n > 1) { + LOGP(warn, "No OpenMP"); + } + n = 1; +#endif + mNThreads = n; +} + +std::pair TPCClusSelector::findClustersRange(int sec, int row, float tbmin, float tbmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct) +{ + // find sorted indices of clusters in the [tbmin:tbmax] range, if not found, return {-1,-2} + const auto& vidx = mSectors[sec].rows[row]; + const auto* clarr = tpcClusterIdxStruct.clusters[sec][row]; + // use binary search to find 1st cluster with time >= tb + int ncl = vidx.size(), left = 0, right = ncl; + while (left < right) { + int mid = left + (right - left) / 2; + if (clarr[vidx[mid]].getTime() < tbmin) { + left = mid + 1; + } else { + right = mid; + } + } + if (left == ncl || clarr[vidx[left]].getTime() > tbmax) { + return {-1, -2}; // all clusters have time < tbmin or no clusters in the range [tbmin:tbmax] + } + int idmin = left, idmax = left, idtst = idmin; + // look at smaller times + while (++idtst < ncl && clarr[vidx[idtst]].getTime() <= tbmax) { + idmax = idtst; + } + return {idmin, idmax}; +} + +int TPCClusSelector::findClustersEntries(int sec, int row, float tbmin, float tbmax, float padmin, float padmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct, std::vector* clIDDirect) +{ + // find direct cluster indices for tbmin:tbmas / padmin/padmax range, fill clIDDirect vector if provided + const auto& vidx = mSectors[sec].rows[row]; + const auto* clarr = tpcClusterIdxStruct.clusters[sec][row]; + // use binary search to find 1st cluster with time >= tb + int ncl = vidx.size(), left = 0, right = ncl; + if (clIDDirect) { + clIDDirect->clear(); + } + while (left < right) { + int mid = left + (right - left) / 2; + if (clarr[vidx[mid]].getTime() < tbmin) { + left = mid + 1; + } else { + right = mid; + } + } + if (left == ncl || clarr[vidx[left]].getTime() > tbmax) { + return 0; // all clusters have time < tbmin or no clusters in the range [tbmin:tbmax] + } + int nclf = 0; + while (left < ncl) { + const auto& cl = clarr[vidx[left]]; + if (cl.getTime() > tbmax) { + break; + } + if (cl.getPad() >= padmin && cl.getPad() <= padmax) { + nclf++; + if (clIDDirect) { + clIDDirect->push_back(vidx[left]); + } + } + } + return nclf; +} + +void TPCClusSelector::fill(const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct) +{ + for (int is = 0; is < NSectors; is++) { + auto& sect = mSectors[is]; +#ifdef WITH_OPENMP +#pragma omp parallel for schedule(dynamic) num_threads(mNThreads) +#endif + for (int ir = 0; ir < Sector::NRows; ir++) { + size_t ncl = tpcClusterIdxStruct.nClusters[is][ir]; + if (ncl >= 0xffff) { + LOGP(error, "Row {} of sector {} has {} clusters, truncating to {}", ir, is, ncl, int(0xffff)); + ncl = 0xffff; + } + auto& rowidx = sect.rows[ir]; + rowidx.resize(ncl); + std::iota(rowidx.begin(), rowidx.end(), 0); + const auto* clus = tpcClusterIdxStruct.clusters[is][ir]; // C array of clusters + std::sort(rowidx.begin(), rowidx.end(), [&](size_t a, size_t b) { return clus[a].getTime() < clus[b].getTime(); }); + } + } +} From 81b7a64680531129657f5a9eb2a222b3d0c779c1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:06:16 +0200 Subject: [PATCH 0394/1764] GPU: Fix allocator / deallocator mismatch --- GPU/GPUTracking/Base/GPUReconstruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index e0c866fd9421b..06f1c27fb6c06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -378,7 +378,7 @@ class GPUReconstruction std::vector res; }; struct alignedDeleter { - void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; + void operator()(void* ptr) { ::operator delete[](ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; }; std::unordered_map mMemoryReuse1to1; std::vector> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag From b8cacf6b25dbb22b30160821d70992cfac594f8b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:16:30 +0200 Subject: [PATCH 0395/1764] GPU TPC Merger: Clarify more variable names --- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +- .../Global/GPUChainTrackingMerger.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 40 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 8 ++-- GPU/GPUTracking/SectorTracker/GPUTPCTrack.h | 3 -- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index ec1636dfe7f59..61f8a614fbe6f 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -124,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters = io.clustersNative->nClustersTotal; mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; - mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include + mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NMergedTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index e9721ec9d12bf..173d2fb916239 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -154,7 +154,7 @@ void GPUChainTracking::PrintMemoryStatistics() } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); - addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); + addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2()); @@ -182,7 +182,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); - GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); + GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters()); } void GPUChainTracking::PrepareKernelDebugOutput() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 2b3d719a27dea..118f0bf73a845 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -261,9 +261,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (param().dodEdxEnabled) { GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } - GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { - GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NMergedTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0); } @@ -330,7 +330,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); - mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment(); mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt(); mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2(); @@ -344,7 +344,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); - processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment(); processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt(); processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2(); @@ -355,7 +355,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters()); + GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters()); } return 0; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 1d5a7a0b1df47..16182464c12fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -372,9 +372,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } - computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { - computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters); } computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters); return mem; @@ -446,7 +446,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxSingleSectorTracks = ntrk; } } - mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); + mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { @@ -1354,14 +1354,14 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i continue; } - uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); - if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters); - for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) { + uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); + if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters); + for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) { mClusters[k].num = 0; mClusters[k].state = 0; } - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); return; } @@ -1711,20 +1711,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); - if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); + if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; } - GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; - GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1759,13 +1759,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetLooper(leg > 0); mergedTrack.SetLegs(leg); mergedTrack.SetNClusters(nHits); - mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster); + mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1796,13 +1796,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { - const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } @@ -1855,7 +1855,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) { if (mSharedCount[mClusters[i].num] > 1) { mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared; } @@ -1876,7 +1876,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) { mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 4487b6d937dc2..54a541ebe0fd6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -70,7 +70,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; GPUAtomic(uint32_t) nMergedTracks; - GPUAtomic(uint32_t) nOutputTrackClusters; + GPUAtomic(uint32_t) nMergedTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS]; @@ -113,8 +113,8 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } - GPUhdi() uint32_t NMaxOutputTrackClusters() const { return mNMaxOutputTrackClusters; } - GPUhdi() uint32_t NOutputTrackClusters() const { return mMemory->nOutputTrackClusters; } + GPUhdi() uint32_t NMaxMergedTrackClusters() const { return mNMaxMergedTrackClusters; } + GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; } GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; } GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); } GPUhdi() const GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() const { return mClustersXYZ; } @@ -249,7 +249,7 @@ class GPUTPCGMMerger : public GPUProcessor uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks uint32_t mNMaxTracks = 0; // maximum number of output tracks uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector - uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) + uint32_t mNMaxMergedTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) uint32_t mNMaxClusters = 0; // max total unique clusters (in event) uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index 225f5f0e2c7ad..7306c84cf949c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -53,9 +53,6 @@ class GPUTPCTrack GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); } GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } - GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; } - GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); } - GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } private: int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array From c5498af7e0ed421d066b22fc36d34530bcd6c478 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:43:59 +0200 Subject: [PATCH 0396/1764] GPU TPC Merger: Fix out of bounds check --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 16182464c12fe..533e697cc5852 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1513,7 +1513,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMSectorTrack* trackParts[kMaxParts]; for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { @@ -1712,7 +1711,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); - if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; From 0a7f3df8bf7e24304a1d2882061f8589e746598f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 21:06:40 +0200 Subject: [PATCH 0397/1764] GPU: Add memoryScaling fuzzing debug option --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 8 ++++++ .../DataTypes/GPUMemorySizeScalers.cxx | 26 +++++++++++++++++++ .../DataTypes/GPUMemorySizeScalers.h | 11 +++++--- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- 6 files changed, 45 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index a4e5d5e1189f5..ad7a31cbd7470 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -304,7 +304,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings->rtc.optConstexpr = false; } - mMemoryScalers->factor = GetProcessingSettings().memoryScalingFactor; + mMemoryScalers->scalingFactor = GetProcessingSettings().memoryScalingFactor; mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate; mMemoryScalers->returnMaxVal = GetProcessingSettings().forceMaxMemScalers != 0; if (GetProcessingSettings().forceMaxMemScalers > 1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 5f80a56e9e64e..2d1061616d907 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -216,6 +216,14 @@ int32_t GPUReconstructionCPU::ExitDevice() int32_t GPUReconstructionCPU::RunChains() { mMemoryScalers->temporaryFactor = 1.; + if (GetProcessingSettings().memoryScalingFuzz) { + static std::mt19937 rng; + static std::uniform_int_distribution dist(0, 1000000); + uint64_t fuzzFactor = GetProcessingSettings().memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().memoryScalingFuzz; + GPUInfo("Fuzzing memory scaling factor with %lu", fuzzFactor); + mMemoryScalers->fuzzScalingFactor(fuzzFactor); + } + mStatNEvents++; mNEventsProcessed++; diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx index 8b8fbc3ecae20..42ac2e8015f45 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx @@ -15,6 +15,8 @@ #include "GPUMemorySizeScalers.h" #include "GPULogging.h" +#include + using namespace o2::gpu; void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) @@ -36,3 +38,27 @@ void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) tpcMaxMergedTrackHits = (double)tmp.tpcMaxMergedTrackHits * scaleFactor; availableMemory = newAvailableMemory; } + +double GPUMemorySizeScalers::getScalingFactor() +{ + if (!doFuzzing) { + return scalingFactor; + } + static std::uniform_int_distribution dist(0, 1000000); + static std::mt19937 rng; + if (fuzzSeed) { + rng = std::mt19937(fuzzSeed); + fuzzLimit = dist(rng) / 10; + fuzzSeed = 0; + } + if (dist(rng) > fuzzLimit) { + return scalingFactor; + } + return scalingFactor * 0.000001 * dist(rng); +} + +void GPUMemorySizeScalers::fuzzScalingFactor(uint64_t seed) +{ + fuzzSeed = seed; + doFuzzing = true; +} diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index e5012d86742f8..164ecb32c26c7 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -28,7 +28,9 @@ struct GPUMemorySizeScalers { size_t nITSTracks = 0; // General scaling factor - double factor = 1; + double scalingFactor = 1; + uint64_t fuzzSeed = 0; + uint64_t fuzzLimit = 0; double temporaryFactor = 1; bool conservative = 0; @@ -64,11 +66,14 @@ struct GPUMemorySizeScalers { size_t tpcMaxMergedTrackHits = 200000000; size_t availableMemory = 20500000000; bool returnMaxVal = false; + bool doFuzzing = false; void rescaleMaxMem(size_t newAvailableMemory); + double getScalingFactor(); + void fuzzScalingFactor(uint64_t seed); inline size_t getValue(size_t maxVal, size_t val) { - return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * factor * temporaryFactor); + return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); } inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } @@ -81,7 +86,7 @@ struct GPUMemorySizeScalers { inline size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); } inline size_t NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservative ? 1.0 : tpcMergedTrackPerSectorTrack)); } inline size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); } - inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * factor * temporaryFactor); } + inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 12f40cda4c398..238994ee53af5 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -312,6 +312,7 @@ AddOption(memoryAllocationStrategy, int8_t, 0, "", 0, "Memory Allocation Strageg AddOption(forceMemoryPoolSize, uint64_t, 1, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ul)) AddOption(forceHostMemoryPoolSize, uint64_t, 0, "hostMemSize", 0, "Force size of allocated host page locked host memory (overriding memSize)", min(0ul)) AddOption(memoryScalingFactor, float, 1.f, "", 0, "Factor to apply to all memory scalers") +AddOption(memoryScalingFuzz, uint64_t, 0, "", 0, "Fuzz the memoryScalingFactor (0 disable, 1 enable, >1 set seed", def(1)) AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing") AddOption(tpcInputWithClusterRejection, uint8_t, 0, "", 0, "Indicate whether the TPC input is CTF data with cluster rejection, to tune buffer estimations") AddOption(forceMaxMemScalers, uint64_t, 0, "", 0, "Force using the maximum values for all buffers, Set a value n > 1 to rescale all maximums to a memory size of n") diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 81eb2c285192b..f7e972315a739 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -110,7 +110,7 @@ int32_t GPUO2Interface::Initialize(const GPUO2InterfaceConfiguration& config) return (1); } if (!mCtx[i].mRec->IsGPU() && mCtx[i].mRec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mCtx[i].mRec->MemoryScalers()->factor *= 2; + mCtx[i].mRec->MemoryScalers()->scalingFactor *= 2; } } if (mConfig->configProcessing.doublePipeline) { From 2673d512ffe9e1e1f658ace2ccf93ac799501b56 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 22 May 2025 11:17:08 +0200 Subject: [PATCH 0398/1764] Drop obsolete documentation (#14309) --- .cmake-format.py | 6 -- Algorithm/CMakeLists.txt | 3 - Algorithm/doc/Algorithm.3.in | 12 --- Algorithm/doc/algorithm_parser.3.in | 135 ---------------------------- CMakeLists.txt | 5 -- Examples/Ex5/CMakeLists.txt | 2 - Examples/Ex5/doc/ex5.7.in | 62 ------------- cmake/O2TargetManPage.cmake | 79 ---------------- doc/CMakeInstructions.md | 18 ---- doc/FairMQDevice.1.in | 64 ------------- doc/ManPages.md | 23 ----- doc/o2-timeframe-file-format.1.in | 27 ------ doc/o2.1.in | 19 ---- 13 files changed, 455 deletions(-) delete mode 100644 Algorithm/doc/Algorithm.3.in delete mode 100644 Algorithm/doc/algorithm_parser.3.in delete mode 100644 Examples/Ex5/doc/ex5.7.in delete mode 100644 cmake/O2TargetManPage.cmake delete mode 100644 doc/FairMQDevice.1.in delete mode 100644 doc/ManPages.md delete mode 100644 doc/o2-timeframe-file-format.1.in delete mode 100644 doc/o2.1.in diff --git a/.cmake-format.py b/.cmake-format.py index 9827eecd329c4..ae092bc09f363 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -66,12 +66,6 @@ "HEADERS": '*', } }, - "o2_target_man_page": { - "kwargs": { - "NAME": '+', - "SECTION": '*', - } - }, "add_root_dictionary": { "kwargs": { "LINKDEF": '+', diff --git a/Algorithm/CMakeLists.txt b/Algorithm/CMakeLists.txt index b245562c7cc93..ed7a42a96e528 100644 --- a/Algorithm/CMakeLists.txt +++ b/Algorithm/CMakeLists.txt @@ -11,9 +11,6 @@ o2_add_header_only_library(Algorithm INTERFACE_LINK_LIBRARIES O2::Headers) -o2_target_man_page(Algorithm NAME Algorithm SECTION 3) -o2_target_man_page(Algorithm NAME algorithm_parser SECTION 3) - o2_add_test(o2formatparser SOURCES test/o2formatparser.cxx COMPONENT_NAME Algorithm diff --git a/Algorithm/doc/Algorithm.3.in b/Algorithm/doc/Algorithm.3.in deleted file mode 100644 index eaf618ee68da2..0000000000000 --- a/Algorithm/doc/Algorithm.3.in +++ /dev/null @@ -1,12 +0,0 @@ -.\" Alice O2 manpage for module Algorithm -.TH "AliceO2" 3 "17 Jan 2017" "1.0" "Algorithm man page" - -.SH NAME -AliceO2 - module -.B Algorithm - -.SH DESCRIPTION -A collection of generic algorithms for Alice O2 - -.SH SEE ALSO -algorithm_parser(3) diff --git a/Algorithm/doc/algorithm_parser.3.in b/Algorithm/doc/algorithm_parser.3.in deleted file mode 100644 index 98f45df279669..0000000000000 --- a/Algorithm/doc/algorithm_parser.3.in +++ /dev/null @@ -1,135 +0,0 @@ -.\" Alice O2 manpage for parser algorithms -.TH "AliceO2" 3 "17 Jan 2017" "1.0" "Algorithm Parser man page" - -.SH NAME -AliceO2 - module -.B Algorithm -- data parsers - -.SH SYNOPSIS -.B ForwardParser< -.I SomeHeaderType -, -.I SomeTrailerType -.B > - -.B ReverseParser< -.I SomeHeaderType -, -.I SomeTrailerType -.B > - -.SS Public types -.TP 2 -// a compound of header, data, and trailer -.B struct FrameInfo { - using PtrT = const PayloadType*; - const HeaderType* header = nullptr; - const TrailerType* trailer = nullptr; - PtrT payload = nullptr; - size_t length = 0; - -.B }; - -.TP 2 -.B using CheckHeaderFct = std::function; -alias for callback checking the header, return true if the object is a valid header -.TP 2 -.B using CheckTrailerFct = std::function; -alias for callback checking the trailer -.TP 2 -.B using GetFrameSizeFct = std::function; -alias for callback to get the complete frame size including header, trailer and the data -.TP 2 -.B using InsertFct = std::function; -function callback to insert/handle one frame into, sequentially called for all frames if the whole block has a valid format - -.SS Public member functions -.TP 2 -.B template -.B int parse(const InputType* \fIbuffer\fB, size_t \fIbufferSize\fB, CheckHeaderFct \fIcheckHeader\fB, CheckTrailerFct \fIcheckTrailer\fB, GetFrameSizeFct \fIgetFrameSize\fB, InsertFct \fIinsert\fB) - -.SS Public member variables -.TP 2 -.B static const size_t headOffset = typesize::size; -the length offset due to header -.TP 2 -.B static const size_t tailOffset = typesize::size; -the length offset due to trailer -.TP 2 -.B static const size_t totalOffset = headOffset + tailOffset; -total length offset due to header and trailer - -.SH DESCRIPTION -Template utilities for parsing of data sequences. Each entry in the sequence consist of a header, variable payload, and optionally a trailer. The three parts are collected in the FrameInfo structure for every entry. - -Callback functions for checking header and trailer integrity, getting length of the current frame and handling of a frame. - -.SS ForwardParser -The size is expected to be part of the header, parsing starts at beginning of buffer. -Trailer type can be void, which is also the default template parameter. That -allows to define a frame consisting of only header and data. - -.SS ReverseParser -The size is expected to be part of the trailer, the parsing is thus in reverse direction. Also the insert callback is called with the entries starting form the end of the buffer. -An easy extension can be to reverse the order of the inserts, meaning that the entries are read from the beginning. - -.SH EXAMPLES -.SS ReverseParser example -.EX -using SomeParser = ReverseParser; -SomeParser parser; -std::vector frames; -parser.parse(ptr, size, - [] (const typename SomeParser::HeaderType& h) { - // check the header - return true; - }, - [] (const typename SomeParser::TrailerType& t) { - // check the trailer - return true; - }, - [] (const typename SomeParser::TrailerType& t) { - // get the size of the frame including payload - // and header and trailer size, e.g. payload size - // from a trailer member - return t.payloadSize + SomeParser::totalOffset; - }, - [&frames] (typename SomeParser::FrameInfo& info) { - frames.emplace_back(info); - return true; - } - ) -.EE - -.SS ForwardParser example with frame consisting of header and payload -.EX -using SomeParser = ForwardParser; -SomeParser parser; -std::vector frames; -parser.parse(ptr, size, - [] (const typename SomeParser::HeaderType& h) { - // check the header - return true; - }, - [] (const typename SomeParser::HeaderType& h) { - // get the size of the frame including payload - // and header and trailer size, e.g. payload size - // from a header member - return h.payloadSize + SomeParser::totalOffset; - }, - [&frames] (typename SomeParser::FrameInfo& info) { - frames.emplace_back(info); - return true; - } - ) -.EE - -.SH BUGS, CONTRIBUTIONS -Please add an issue to -.UR https://github.com/AliceO2Group/AliceO2/issues -.UE - -.SH SEE ALSO -.UR https://github.com/AliceO2Group/AliceO2/blob/dev/Algorithm/include/Algorithm/Parser.h -.UE diff --git a/CMakeLists.txt b/CMakeLists.txt index b71d05175e9e9..adecffc0f4dbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,7 +86,6 @@ include(O2AddTestRootMacro) include(O2ReportNonTestedMacros) include(O2TargetRootDictionary) include(O2DataFile) -include(O2TargetManPage) include(O2AddWorkflow) include(O2SetROOTPCMDependencies) include(O2AddHipifiedExecutable) @@ -117,10 +116,6 @@ endif() add_subdirectory(config) -add_custom_target(man ALL) -o2_target_man_page(man NAME o2) -o2_target_man_page(man NAME FairMQDevice) - # Testing and packaging only needed if we are the top level directory if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) # Documentation diff --git a/Examples/Ex5/CMakeLists.txt b/Examples/Ex5/CMakeLists.txt index 3951709a8a050..f6216bcebdad2 100644 --- a/Examples/Ex5/CMakeLists.txt +++ b/Examples/Ex5/CMakeLists.txt @@ -12,5 +12,3 @@ o2_add_executable(ex5 SOURCES src/run.cxx COMPONENT_NAME example TARGETVARNAME targetName) - -o2_target_man_page(${targetName} NAME ex5 SECTION 7) diff --git a/Examples/Ex5/doc/ex5.7.in b/Examples/Ex5/doc/ex5.7.in deleted file mode 100644 index 5d346f7f4798d..0000000000000 --- a/Examples/Ex5/doc/ex5.7.in +++ /dev/null @@ -1,62 +0,0 @@ -.\" Manpage for ex5. - -.\" this file gives some basic introduction on how to use the -.\" roff format to write man pages -.\" NOTE: all formatting commands start with a dot and must be -.\" at the beginning of the line - -.\" the header section -.TH AliceO2 1 "07 July 2019" "1.0" "ex5 man page" - -.\" .SH starts a new section, NAME is the first section -.SH NAME - -ex5 - A simple example for AliceO2 submodules - -.\" next is the SYNOPSIS section -.SH SYNOPSIS - -.\" some bold formatted text -.B ex5 -.\" alternate between roman and bold font, separated by blank, i.e. the -.\" square backets in roman and the option in bold -.RB [ --someoption ] -.\" same here, in addition, the dots indicating the argument string are in -.\" italic. Note: the quoted " [" makes sure there is a blank -.RB [ --debug " [" --gdb=\fI...\fR ]] -.\" italic formatting (underline in man) -.I mandatory_argument -.\" alternate roman and italic -.RI [ further_arguments... ] - - -.SH DESCRIPTION - -ex5 is an example to demonstrate the AliceO2 cmake setup of -modules. This document illustrates creation of man pages. All options and -arument are pure fictive. - -.SH OPTIONS - -.\" indented paragraph with label, indentation is set to the optional number -.TP 5 -.B --someoption -This is a fancy option of the example. - -.TP 5 -.B --debug -Run everything with debugging options - -.TP 5 -.B --gdb=\fI...\fR -Add additional information to run with gdb - -.SH SEE ALSO - -ex5(1) - -http://gnustep.made-it.com/man-groff.html - -.SH BUGS - -No known bugs diff --git a/cmake/O2TargetManPage.cmake b/cmake/O2TargetManPage.cmake deleted file mode 100644 index 5d29447c52536..0000000000000 --- a/cmake/O2TargetManPage.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -include_guard() - -# Generate a man page -# -# Make sure we have nroff. If that is not the case we will not generate man -# pages -find_program(NROFF_FOUND nroff) - -function(o2_target_man_page target) - if(NOT NROFF_FOUND) - return() - endif() - cmake_parse_arguments(PARSE_ARGV - 1 - A - "" - "NAME;SECTION" - "") - - # check the target exists - if(NOT TARGET ${target}) - # try with out naming conventions - set(baseTargetName ${target}) - o2_name_target(${baseTargetName} NAME target) - if(NOT TARGET ${target}) - # not a library, maybe an executable ? - o2_name_target(${baseTargetName} NAME target IS_EXE) - if(NOT TARGET ${target}) - message(FATAL_ERROR "Target ${target} does not exist") - endif() - endif() - endif() - - if(NOT A_SECTION) - set(A_SECTION 1) - endif() - if(NOT A_NAME) - message( - FATAL_ERROR - "You must provide the name of the input man file in doc/.
.in" - ) - endif() - if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in) - message( - FATAL_ERROR - "Input file ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in does not exist" - ) - endif() - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in - COMMAND nroff - -Tascii - -man - ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in - > - ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - VERBATIM) - # the prefix man. for the target name avoids circular dependencies for the man - # pages added at top level. Simply droping the dependency for those does not - # invoke the custom command on all systems. - set(CUSTOM_TARGET_NAME man.${A_NAME}.${A_SECTION}) - add_custom_target(${CUSTOM_TARGET_NAME} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION}) - add_dependencies(${target} ${CUSTOM_TARGET_NAME}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - DESTINATION ${CMAKE_INSTALL_DATADIR}/man/man${A_SECTION}) -endfunction() diff --git a/doc/CMakeInstructions.md b/doc/CMakeInstructions.md index c4f55140f611c..e0438c985c41d 100644 --- a/doc/CMakeInstructions.md +++ b/doc/CMakeInstructions.md @@ -349,24 +349,6 @@ Note as well that some (very few) tests are ran only for some configurations ctest -C RelWithDebInfo ``` -#### [Ex5](../Examples/Ex5) Adding a man page - -If a module provides one or more executables, it might be of interest for the users of those executables to have access to a man page for them. Ex5 illustates that use case. - - . - ├── CMakeLists.txt - ├── README.md - ├── doc - │   └── ex5.7.in - └── src - └── run.cxx - -The [man page](ManPages.md) is created using : - - o2_target_man_page([targetName] NAME ex5 SECTION 7) - -where `NAME xx` refers to a file `doc/xx.[SECTION].in`, and the actual `targetName` can be found from the base target name (ex5 in that case) using the [o2_name_target](../cmake/O2NameTarget.cmake) function. - ## CTest In the build directory of O2, if you launch the `ctest` command, all the O2 tests will be ran, which is not always what you want/need, in particular during development. diff --git a/doc/FairMQDevice.1.in b/doc/FairMQDevice.1.in deleted file mode 100644 index 4e33e3379ae82..0000000000000 --- a/doc/FairMQDevice.1.in +++ /dev/null @@ -1,64 +0,0 @@ -.\" Manpage for general FairMQ device properties and options. -.TH AliceO2 1 "10 Apr 2022" "1.1" "FairMQ device man page" -.SH NAME -fair::mq::Device - The basis for software devices in O2 - -.SH SYNOPSIS -.I deviceapp -.BI --id deviceid -.BI --mq-config " configfile" -| -.BI --channel-config " config" -| -.BI --config-json-string " arg" -| -.BI --config-xml-string " arg" - -.SH DESCRIPTION -All AliceO2 devices derive from fair::mq::Device which provides the -transport functionality. - -.SH OPTIONS -.SS Common FairMQ device options -.TP 2 -.BI --id " id" -A unique identifier of the device -.TP 2 -.BI --verbosity " arg " \fR(=DEBUG) -.RS -Verbosity level : -.B TRACE -.B DEBUG -.B RESULTS -.B INFO -.B WARN -.B ERROR -.B STATE -.B NOLOG -.RE - -.SS FairMQ device channel configuration -.TP 2 -.BI --config-xml-string " arg " -XML input as command line string. -.TP 2 -.BI --config-json-string " arg " -JSON input as command line string. -.TP 2 -.BI --mq-config " arg " -JSON/XML input as file. The configuration object will check xml or -json file extention and will call the json or xml parser accordingly -.TP 2 -.BI --channel-config " args " -channel configuration as comma separated key=\fIvalue\fR pairs -.RS -Valid Keys: -.B name -.B type -.B method -.B address -.B property -.RE - -.SH MORE OPTIONS -Use '\fIdeviceapp\fR --help' to get a full list of options. diff --git a/doc/ManPages.md b/doc/ManPages.md deleted file mode 100644 index 8fb7b48b07beb..0000000000000 --- a/doc/ManPages.md +++ /dev/null @@ -1,23 +0,0 @@ - - -# Man Pages - -You can create man pages in nroff format under: - - Subsystem/Module/docs/.
.in - -and it will create a man page for you in: - - ${CMAKE_BINARY_DIR}/stage/share/man/man
- -if you add: - - o2_target_man_page(target NAME SECTION
) - -to your `CMakeLists.txt`. Note the man page is "attached" to a given target. -If `SECTION` is omitted it will default to 1 -(executables). For more informantion about nroff format you can look at: - - http://www.linuxjournal.com/article/1158 diff --git a/doc/o2-timeframe-file-format.1.in b/doc/o2-timeframe-file-format.1.in deleted file mode 100644 index df36ff7256e33..0000000000000 --- a/doc/o2-timeframe-file-format.1.in +++ /dev/null @@ -1,27 +0,0 @@ -.\" Manpage for O2. -.TH man 1 "19 May 2017" "1.0" "Alice O2 Timeframe Format" - -.SH DESCRIPTION - -O2 is Alice next generation software framework to be used for RUN3. This is a -quick desctiption of the timeframe file format as dumped by -o2-timeframe-writer-device and read by the o2-timeframe-reader-device. - -The file format is simply a dump of the timeframe on disk. Multiple timeframes -can be concatenated resulting in a valid file. The format is as follow: - -o2tf: Timeframe [Timeframe [..]] -Timeframe: Subtimeframe [Subtimeframe [...]] TimeframeIndex -Subtimeframe: Header Payload -Header: DataHeader derived header stack -Payload: binary blob -TimeframeIndex: IndexElement [IndexElement [..]] -IndexElement: DataHeader Payload -Position in timeframe: int (4 bytes) -DataHeader: only the DataHeader part -Payload: binary blob - -.SH DISCLAIMER - -Notice that this file format is a work in progress and cannot be used for -anything but debugging purposes. diff --git a/doc/o2.1.in b/doc/o2.1.in deleted file mode 100644 index 57d74acf1640d..0000000000000 --- a/doc/o2.1.in +++ /dev/null @@ -1,19 +0,0 @@ -.\" Manpage for O2. -.TH man 1 "19 May 2017" "1.0" "Alice O2 man page" - -.SH NAME - -O2 is Alice next generation software framework to be used for RUN3. - -.SH DEVICES - -o2-alicehlt-wrapper-device(1), o2-subframebuilder-device(1) - -.\.SH TOOLS - -.SH SEE ALSO -FairMQDevice(1) - -.SH BUGS - -No bugs whatsoever From b41a2a1e13cd537654c05d5dedb25b9c63e473da Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 14:44:22 +0200 Subject: [PATCH 0399/1764] Update GPU documentation build-standalone.md --- GPU/documentation/build-standalone.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md index 891d16b4dc2c4..bf84fd3edf0fb 100644 --- a/GPU/documentation/build-standalone.md +++ b/GPU/documentation/build-standalone.md @@ -55,9 +55,20 @@ An example line would .e.g. be ``` Some other noteworthy options are `--display` to run the GPU event display, `--qa` to run a QA task on MC data, `--runs` and `--runs2` to run multiple iterations of the benchmark, `--printSettings` to print all the settings that were used, `--memoryStat` to print memory statistics, `--sync` to run with settings for online reco, `--syncAsync` to run online reco first, and then offline reco on the produced TPC CTF data, `--setO2Settings` to use some defaults as they are in O2 not in the standalone version, `--PROCdoublePipeline` to enable the double-threaded pipeline for best performance (works only with multiple iterations, and not in async mode), and `--RTCenable` to enable the run time compilation improvements (check also `--RTCcacheOutput`). -An example for a benchmark in online mode would be: +With `--memSize` you can control the amount of GPU memory to use, and with `--inputMemory` and `--outputMemory` GPU-registered input/output buffers can be preallocated (as is the SHM memory when running in O2). +An example for a benchmark that runs with the same settings as in online data taking would be: ``` -./ca -e o2-pbpb-100 -g --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 +./ca -e o2-pbpb-100 -g --gpuType HIP --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 --memSize 15000000000 --inputMemory 6000000000 --outputMemory 10000000000 +``` + +For setting a GPU device, you can use the `--gpuDevice` option with the GPU index. +For ROCm with many GPUs, however, like on the EPNs with 8 GPUs, it is better to set the `ROCR_VISIBLE_DEVICES` env variable to the GPU you want to use. +MAKE SURE TO CHECK IF IT IS ALREADY SET BY SLURM WHEN YOU GET THE NODE!!! IN THAT CASE, USE ONLY THE GPUS ASSIGNED TO YOU BY SLURM! + +Finally, also NUMA pinning can play a role. On the EPN, you should use memory and GPUs and CPU cores from the same NUMA domain. +For a reaslistic benchmark using GPU 0 on the EPNs, please use: +``` +ROCR_VISIBLE_DEVICES=0 numactl --membind 0 --cpunodebind 0 ./ca -e o2-pbpb-100 --gpuType HIP --memSize 15000000000 --inputMemory 6000000000 --outputMemory 10000000000 --sync --runs 10 --RTCenable --setO2Settings --PROCdoublePipeline ``` # Generating a dataset @@ -84,3 +95,5 @@ To dump standalone data from CTF raw data in `myctf.root`, you can use the same ``` CTFINPUT=1 INPUT_FILE_LIST=myctf.root CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh ``` + +On the EPNs, you can find some reference data sets at `/home/drohr/standalone/events`. From 010f8676d1c1e4849281bc18405192dea0c9bc58 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 21 May 2025 09:47:17 +0200 Subject: [PATCH 0400/1764] dpl-workflow.sh: using MI100 serialization workaround by default again, with option to disable it with env var --- prodtests/full-system-test/dpl-workflow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 4e6cbbebe7db7..dd575099857f4 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -270,6 +270,7 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi + [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From b6f15f87e212a896e8c56f3cf475d1ef2c677889 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 13:53:13 +0200 Subject: [PATCH 0401/1764] GPU RTC: Add keepTempFiles option --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 6 ++++-- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index c8e5420a8bcf3..907bd08779ec2 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -420,8 +420,10 @@ void GPUReconstructionCUDA::genAndLoadRTC() mInternals->kernelModules.emplace_back(std::make_unique()); GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } - remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); - remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); + if (!GetProcessingSettings().rtctech.keepTempFiles) { + remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); + remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); + } } if (GetProcessingSettings().rtctech.runTest == 2) { return; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 238994ee53af5..8dabd17f95d23 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -233,6 +233,7 @@ AddOption(cacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the c AddOption(prependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(overrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") // Part of cmdLine, so checked against the cache AddOption(loadLaunchBoundsFromFile, std::string, "", "", 0, "Load a parameter object containing the launch bounds from a file") +AddOption(keepTempFiles, bool, false, "", 0, "Keep temporary source and object files") AddHelp("help", 'h') EndConfig() From e060099977dac22c9eea41bc27234e45ee590a27 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 10:16:48 +0200 Subject: [PATCH 0402/1764] GPU: Fix CUDA GetMemInfo must use correct device and simplify context creation / cleanup --- .../Base/cuda/GPUReconstructionCUDA.cu | 35 +++++++------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 907bd08779ec2..0d5666b8ee790 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -125,34 +125,25 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } std::vector devicesOK(count, false); std::vector devMemory(count, 0); - bool contextCreated = false; + std::vector contextCreated(count, false); for (int32_t i = 0; i < count; i++) { if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Examining device %d", i); } size_t free, total; -#ifndef __HIPCC__ // CUDA - if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { -#else // HIP - if (GPUChkErrI(hipSetDevice(i))) { -#endif + if (GPUChkErrI(cudaSetDevice(i))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); } continue; } - contextCreated = true; + contextCreated[i] = true; if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUChkErr(cudaDeviceReset()); continue; } - if (count > 1) { - GPUChkErr(cudaDeviceReset()); - contextCreated = false; - } if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } @@ -212,13 +203,20 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() bestDevice = GetProcessingSettings().deviceNum; } } - if (noDevice) { - if (contextCreated) { + for (int32_t i = 0; i < count; i++) { + if (contextCreated[i] && (noDevice || i != bestDevice)) { + GPUChkErrI(cudaSetDevice(i)); GPUChkErrI(cudaDeviceReset()); } + } + if (noDevice) { return (1); } mDeviceId = bestDevice; + if (GPUChkErrI(cudaSetDevice(mDeviceId))) { + GPUError("Could not set CUDA Device!"); + return (1); + } GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); @@ -262,15 +260,6 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #endif -#ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { -#else // HIP - if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { -#endif - GPUError("Could not set CUDA Device!"); - return (1); - } - #ifndef __HIPCC__ // CUDA if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size"); From 52937edaf56f75f1e347141e4105a31a95c55fc9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 13:43:20 +0200 Subject: [PATCH 0403/1764] GPU Display: Store pointer to GPUSettingsProcessing, so we do not need to copy debugLevel to GPUParam --- .../Base/GPUReconstructionTimeframe.cxx | 3 +-- .../Interface/GPUO2InterfaceDisplay.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- GPU/GPUTracking/display/GPUDisplay.cxx | 18 ++++++++++-------- GPU/GPUTracking/display/GPUDisplay.h | 8 +++++++- .../display/GPUDisplayInterface.cxx | 4 ++-- GPU/GPUTracking/display/GPUDisplayInterface.h | 14 ++++++++++++-- .../backend/GPUDisplayBackendOpenGL.cxx | 2 +- .../backend/GPUDisplayBackendVulkan.cxx | 8 ++++---- .../frontend/GPUDisplayFrontendWayland.cxx | 6 +++--- .../display/helpers/GPUDisplayLoader.cxx | 15 +++++++++++++-- .../display/render/GPUDisplayDraw.cxx | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 2 +- GPU/GPUTracking/qa/genEvents.cxx | 3 +-- 14 files changed, 58 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index b25b93e957b15..fefcd0ac925fe 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -25,14 +25,13 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" #include "GPUO2DataTypes.h" +#include "GPUSettings.h" #include #include #include #include -#include "utils/qconfig.h" - using namespace o2::gpu; namespace o2::gpu diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx index f84f29d826f1d..60d5eaf9ae162 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx @@ -35,7 +35,7 @@ GPUO2InterfaceDisplay::GPUO2InterfaceDisplay(const GPUO2InterfaceConfiguration* mQA.reset(new GPUQA(nullptr, &config->configQA, mParam.get())); mQA->InitO2MCData(); } - mDisplay.reset(GPUDisplayInterface::getDisplay(mFrontend.get(), nullptr, mQA.get(), mParam.get(), &mConfig->configCalib, &mConfig->configDisplay)); + mDisplay.reset(GPUDisplayInterface::getDisplay(mFrontend.get(), nullptr, mQA.get(), mParam.get(), &mConfig->configCalib, &mConfig->configDisplay, &mConfig->configProcessing)); } GPUO2InterfaceDisplay::~GPUO2InterfaceDisplay() = default; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index aed42e4f98f0c..9fb12432e763a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -42,7 +42,7 @@ #include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED -#include "../utils/qconfig.h" +#include "GPUSettings.h" #include "AliHLTTPCClusterMCData.h" #endif diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 5b0960919da15..136b1947f60ee 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -34,7 +34,7 @@ #include "GPUTPCTracker.h" #include "GPUTPCGMMergedTrack.h" #include "GPUO2DataTypes.h" -#include "utils/qconfig.h" +#include "GPUSettings.h" #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" @@ -44,17 +44,19 @@ constexpr hmm_mat4 MY_HMM_IDENTITY = {{{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, using namespace o2::gpu; -static const GPUSettingsDisplay& GPUDisplay_GetConfig(GPUChainTracking* chain) +const GPUSettingsDisplay& GPUDisplay::GetConfig(GPUChainTracking* chain) { static GPUSettingsDisplay defaultConfig; - if (chain && chain->mConfigDisplay) { - return *chain->mConfigDisplay; - } else { - return defaultConfig; - } + return (chain && chain->mConfigDisplay) ? *chain->mConfigDisplay : defaultConfig; +} + +const GPUSettingsProcessing& GPUDisplay::GetProcessingConfig(GPUChainTracking* chain) +{ + static GPUSettingsProcessing defaultConfig; + return chain ? chain->GetProcessingSettings() : defaultConfig; } -GPUDisplay::GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config) : GPUDisplayInterface(), mFrontend(frontend), mChain(chain), mConfig(config ? *config : GPUDisplay_GetConfig(chain)), mQA(qa) +GPUDisplay::GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config, const GPUSettingsProcessing* proc) : GPUDisplayInterface(), mFrontend(frontend), mChain(chain), mConfig(config ? *config : GetConfig(chain)), mProcessingSettings(proc ? *proc : GetProcessingConfig(chain)), mQA(qa) { mParam = param ? param : &mChain->GetParam(); mCalib = calib; diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index dbd90020698b2..06977c26e0b63 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -18,6 +18,7 @@ #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" #include "GPUDisplayInterface.h" +#include "GPUSettings.h" #include "../utils/vecpod.h" #include "../utils/qsem.h" @@ -37,7 +38,7 @@ class GPUTRDGeometry; class GPUDisplay : public GPUDisplayInterface { public: - GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr); + GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr, const GPUSettingsProcessing* proc = nullptr); GPUDisplay(const GPUDisplay&) = delete; ~GPUDisplay() override = default; @@ -71,6 +72,7 @@ class GPUDisplay : public GPUDisplayInterface }; vecpod* vertexBuffer() { return mVertexBuffer; } const GPUParam* param() { return mParam; } + const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; } GPUDisplayFrontend* frontend() { return mFrontend; } bool drawTextInCompatMode() const { return mDrawTextInCompatMode; } int32_t& drawTextFontSize() { return mDrawTextFontSize; } @@ -140,6 +142,9 @@ class GPUDisplay : public GPUDisplayInterface bool mVerbose = false; }; + static const GPUSettingsDisplay& GetConfig(GPUChainTracking* chain); + static const GPUSettingsProcessing& GetProcessingConfig(GPUChainTracking* chain); + void DrawGLScene_internal(float animateTime = -1.f, bool renderToMixBuffer = false); void DrawGLScene_updateEventData(); void DrawGLScene_cameraAndAnimation(float animateTime, float& mixSlaveImage, hmm_mat4& nextViewMatrix); @@ -214,6 +219,7 @@ class GPUDisplay : public GPUDisplayInterface GPUSettingsDisplayLight mCfgL; GPUSettingsDisplayHeavy mCfgH; GPUSettingsDisplayRenderer mCfgR; + const GPUSettingsProcessing& mProcessingSettings; GPUQA* mQA; qSem mSemLockDisplay; diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.cxx b/GPU/GPUTracking/display/GPUDisplayInterface.cxx index 2f5cc9cbb5dd5..2eddef998fa8b 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.cxx +++ b/GPU/GPUTracking/display/GPUDisplayInterface.cxx @@ -65,9 +65,9 @@ static void* loadUnloadLib(bool load) return nullptr; } -GPUDisplayInterface* GPUDisplayInterface::getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config) +GPUDisplayInterface* GPUDisplayInterface::getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config, const GPUSettingsProcessing* proc) { - std::tuple args = {frontend, chain, qa, param, calib, config}; + std::tuple args = {frontend, chain, qa, param, calib, config, proc}; auto func = (GPUDisplayInterface * (*)(const char*, void*)) loadUnloadLib(true); return func ? func("display", &args) : nullptr; } diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 3c6928c78e5a1..574a8cffc71f0 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -15,7 +15,7 @@ #ifndef GPUDISPLAYINTERFACE_H #define GPUDISPLAYINTERFACE_H -#include "GPUSettings.h" +#include namespace o2::gpu { @@ -23,6 +23,16 @@ namespace o2::gpu class GPUChainTracking; class GPUQA; struct GPUParam; +struct GPUTrackingInOutPointers; +template +struct ConstPtr; +template